Exemplo n.º 1
0
def matrix_diag_part(input_diagonal, input_help):
    """
    Calculate the batched diagonal part of a batched tensor.
    Note:
        input_help is a tensor with a diagonal element of 1 and other positions of 0,
        the last two dimensions can be unequal.

    Args:
        input_diagonal (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8. The last two dimensions
                                            can be unequal.
        input_help (tvm.tensor.Tensor): Tensor of float32, float16, int32, int8, uint8, and with a diagonal element of 1
                                        and other positions of 0.
    Returns:
        tvm.tensor.Tensor, has the same type as input_diagonal, the shape dims is equal to dims(input_diagonal) - 1.
    """
    dtype_input_diagonal = input_diagonal.dtype
    dtype_input_help = input_help.dtype

    utils.elemwise_shape_check(input_help.shape, input_diagonal.shape)

    if len(input_help.shape) < 2:
        raise ValueError("Input tensors of rank>=2 are supported!")

    utils.ops_dtype_check([dtype_input_diagonal, dtype_input_help], [
        utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8,
        utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.UINT8
    ])
    res = matrix_diag_part_compute(input_diagonal, input_help)
    return res
Exemplo n.º 2
0
def Sin(x, target=utils.CCE):
    """
    Computes sine value of a tensor with Taylor's theorem.

    .. math::
        \\begin{array}{ll} \\\\
            sin(x) = x - \\frac{x^3}{3!} + \\frac{x^5}{5!} + ... +
                (-1)^k \\cdot \\frac{x^{2(k+1)}}{(2(k+1))!}
        \\end{array}

    Args:
        x (tvm.tensor.Tensor): Tensor of type float16, float32.

    Rerurns:
        tvm.tensor.Tensor of same type and shape as in_data.
    
    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.check_shape(x.shape)

    use_call = True
    if use_call:
        return sin_call(x)
    return sin_compute(x)
Exemplo n.º 3
0
def reciprocal(data, high_precision=True, target=utils.CCE):
    """
    Computes the reciprocal of data element-wise.

    Args:
        data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32.
        high_precision (bool): a bool value, whether to use high-precision version.

    Returns:
        tvm.tensor.Tensor of same type and shape as data.

    Supported Platforms:
        'Ascend', 'GPU'
    """
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    utils.check_shape(shape)

    res = akg.tvm.compute(
        shape,
        lambda *indice: akg.tvm.const(1, data.dtype) / data(*indice),
        name="res")

    # When product is mini, using Newtom iteration method to achieve higher precision.
    if product_is_mini() and high_precision:
        steps = 1
        for _ in range(steps):
            temp1 = data * res
            temp2 = temp1 * akg.tvm.const(-1, data.dtype)
            temp3 = temp2 + akg.tvm.const(2, data.dtype)
            res = temp3 * res

    return res
Exemplo n.º 4
0
def xdivy(data_x1, data_x2, target=utils.CCE):
    """
    Calculate data_x1 divided by data_x2.

    .. math::
        y = \\left\\{
	    \\begin{aligned}
		0, && if \\quad x1 == 0 \\\\
		\\dfrac{x1}{x2}, && otherwise
	    \\end{aligned}
	\\right.

    Args:
        data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"
        data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32"

    Returns:
        tvm.tensor.Tensor
    """
    shape_x1 = get_shape(data_x1)
    shape_x2 = get_shape(data_x2)

    utils.check_shape(shape_x1)
    utils.check_shape(shape_x2)

    utils.elemwise_dtype_check(data_x1.dtype, data_x2.dtype)
    dtype = data_x1.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)

    return xdivy_compute(data_x1, data_x2)
Exemplo n.º 5
0
def pad(data, paddings, padtype, target="cce"):
    """add paddings to the tensor
    :shape: The shape of the tensor, now only support two dimension Tensor
    :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor,
     For each dimension D of input, paddings[D, 0] indicates how many values to add before
     the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to
     add after the contents of tensor in that dimension.
    :dtype: The type of the input, float16, float32
    :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC".
    """
    # check shape
    utils.check_shape(data.shape)
    # check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_TYPES)
    # check padding types
    ptype_checklist = ['constant']
    if not (padtype in ptype_checklist):
        raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype))

    dtype = data.dtype
    if dtype == 'int8' or dtype == 'uint8':
        data = Cast(data, "float16", target=target)

    rank = len(data.shape)
    pad_before = []
    pad_after = []
    for i in range(rank):
        pad_before.append(paddings[i][0])
        pad_after.append(paddings[i][1])
    B = tvm_pad(data, pad_before, pad_after=pad_after, name='B')

    if dtype == 'int8' or dtype == 'uint8':
        B = Cast(B, dtype, target=target)
    return B
Exemplo n.º 6
0
def flatten(x):
    """
    reshape into (batch, c*h*w).

    Args:
        x (akg.tvm.tensor.Tensor): the first dimension is batch 

    Returns:
       akg.tvm.tensor.Tensor
    """
    # check shape
    utils.check_shape(x)
    shape = get_shape(x)

    # check input tensor data_type
    utils.ops_dtype_check(x.dtype, [
        utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8,
        utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.INT32,
        utils.DtypeForDavinci.INT64, utils.DtypeForDavinci.UINT8,
        utils.DtypeForDavinci.UINT16, utils.DtypeForDavinci.UINT32,
        utils.DtypeForDavinci.UINT64
    ])

    size = 1
    for i in range(1, len(shape)):
        size = size * shape[i]

    new_shape = [shape[0], size]
    res = akg.topi.reshape(x, new_shape)
    return res
Exemplo n.º 7
0
def sum_v2(inputs, axis=None, keepdims=True, target=utils.CCE):
    """
    another implementation of sum with topi api.

    Supported Platforms:
        'Ascend'
    """
    if target != utils.CCE:
        raise RuntimeError('operator not supported on %s' %
                           utils.get_backend(target))

    dtype = inputs.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    axis = ft_util.refine_reduce_axis(inputs, axis)
    utils.check_shape(inputs.shape)
    if not axis:
        output = akg.topi.identity(inputs)
    else:
        if dtype == "float16":
            step_sum = Cast(inputs, "float32", target)
        else:
            step_sum = inputs

        step_sum = akg.topi.sum(step_sum, axis=axis, keepdims=keepdims)

        if dtype == "float16":
            output = Cast(step_sum, "float16", target)
        else:
            output = step_sum
    return output
Exemplo n.º 8
0
def resize_nearest(input, output_shape):
    """
    Resize images using Nearest-neighbor interpolation.
    
    Args:
        input (tvm.tensor.Tensor): 4-D tensor of type float16 or float32 `("NHWC")`.
        output_shape (Union[tuple, list]): New size of image 4 integers `("NHWC")`.
    
    Note:
        The batch_num("N") of input and output must be equal, channel_num("C") is also.
    
    Returns:
        tvm.tensor.Tensor, has the same type as `input`.
    """
    input_shape = get_shape(input)
    utils.check_shape(input, 4, "input")
    utils.check_shape(output_shape, 4, "output_shape")
    utils.ops_dtype_check(input.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.check_equal("input batchsize", "output batchsize", input_shape[0], output_shape[0])
    utils.check_equal("input channel num", "output channel num", input_shape[3], output_shape[3])

    res = process_integer_scale(input, output_shape)
    if res == None:
        res = process_non_integer_scale(input, output_shape)
    return res
Exemplo n.º 9
0
def leaky_relu(data, negative_slop=0):
    """
    leaky_relu op for input tensor (N,C,H,W) OR (N,C1,H,W,C0).

    ..math:`max(x,negative_slop*x)`

    Args:
        data (tvm.tensor.Tensor): tensor with type float16 or float32.
        negative_slop (float): 0<=negative_slop<1

    Returns:
        tvm.tensor.Tensor.
    """
    dtype = data.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)

    utils.check_shape(data.shape)

    if negative_slop >= 1 or negative_slop < 0:
        raise RuntimeError(
            "leaky_relu only support negative_slop between [0,1)")

    slop_tmp = akg.tvm.const(negative_slop, dtype=dtype)
    tmp = akg.lang.ascend.vmuls(data, slop_tmp)
    res = akg.lang.ascend.vmax(tmp, data)

    return res
Exemplo n.º 10
0
def logsoftmax_grad(Y, dY, axis):
    """
    Computes the back propagation gradients by chain rule.

    Args:
        Y: Tensor, holds the logsoftmax activation output.
        dY: Tensor, holds the initial gradients.
        axis: Integer, on which dimension the softmax is applied.

    Returns:
        Tensor, the overall gradients.
    """
    shape = [x.value for x in Y.shape]
    utils.check_shape(shape)
    dtype = Y.dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    if axis == -1:
        axis = len(shape) + axis
    if axis >= len(shape):
        raise RuntimeError("axis should be less than dimension")
    if axis < -1:
        raise RuntimeError(
            "negative axis only support -1, please specify the axis in positive value"
        )

    softmax = akg.topi.exp(Y)
    dy_sum = akg.lang.ascend.sum(dY, axis=axis)
    dy_sum_broadcast = akg.lang.ascend.broadcast(dy_sum, shape)
    mul_result = akg.lang.ascend.vmul(softmax, dy_sum_broadcast)
    res = akg.lang.ascend.vsub(dY, mul_result)
    attrs = {"pragma_modshift": 1}
    return res, attrs
Exemplo n.º 11
0
def reduce_any_d(x, axis=None, keepdims=False):
    """
    Reduce a tensor on a certain axis based on max.

    Args:

        x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8.
        axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced.
                                              each dim must be in the range [-len(data.shape), len(data.shape) - 1].
        keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False.

    Returns:
        tvm.tensor.Tensor of same type as input tensor x.
    """
    # check type
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.INT8)
    utils.check_shape(x.shape)
    # check axis
    utils.reduce_axis_check(x.shape, axis)
    refined_axis = refine_reduce_axis(x, axis)
    if len(set(refined_axis)) == len(x.shape) and not keepdims:
        keepdims = True
    res = _reduce_any_d_compute(x, refined_axis, keepdims)
    if len(set(refined_axis)) == len(x.shape):
        res = topi.reshape(res, (1, ))
    return res
Exemplo n.º 12
0
def Concat(data, axis, target=utils.CCE):
    """
    Concatenates data along the dimension set by axis.

    Args:
        data (Union[list, tuple]): list or tuple of tvm.tensor.Tensor of type float16, float32, int32, int8, uint8
        axis (int): Specifies the axis along which to concatenate. Must be in the range [-rank(data), rank(data))

    Returns:
        tvm.tensor.Tensor of same type as data.
    """

    data_size = len(data)
    if data_size < min_size:
        raise RuntimeError("The size of data must be greater equal 1")

    dtype = data[0].dtype
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_TYPES)

    shape_0 = data[0].shape
    utils.check_shape(shape_0)
    if axis < 0:
        axis += len(shape_0)

    for i in range(1, data_size):
        shape_i = data[i].shape
        utils.check_shape(shape_i)
        if len(shape_i) != len(shape_0):
            raise ValueError("Input tensors must have same dimensions.")

    res = akg.lang.ascend.concat(data, axis)
    return res
Exemplo n.º 13
0
def sum(inputs, axis=None, keepdims=False, target=utils.CCE):
    """
    Compute the sum of elements across dimensions of a tensor.

    Args:
        inputs (tvm.tensor.Tensor): Tensor.
        axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None.
        keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length.

    Returns:
        tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained
        with length 1, else these reduced axis will be eliminate.

    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    # Check types
    if target == utils.CCE:
        dtype = inputs.dtype
        utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    axis = ft_util.refine_reduce_axis(inputs, axis)
    utils.check_shape(inputs.shape)

    if not axis:
        output = akg.topi.identity(inputs)
    else:
        output = akg.topi.sum(inputs, axis=axis, keepdims=keepdims)
    return output
Exemplo n.º 14
0
def bitwise_xor(x1, x2, target=utils.CCE):
    """
    Computes the bitwise or of `x1` and `x2`.

    Args:
        x1 (tvm.tensor.Tensor): Tensor of type int16, uint16.
        x2 (tvm.tensor.Tensor): Tensor of type int16, uint16.

    Returns:
        tvm.tensor.Tensor, has the same type as x1.
    """
    # check shape
    utils.check_shape(x1)
    utils.check_shape(x2)
    _, _, output_shape = produce_shapes(get_shape(x1), get_shape(x2))

    # check input tensor data_type
    utils.ops_dtype_check(
        [x1.dtype, x2.dtype],
        [utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.UINT16])
    dtype = x1.dtype
    if dtype != x2.dtype:
        raise RuntimeError("input type must be same, but got %s  vs %s", dtype,
                           x2.dtype)

    x1 = akg.topi.broadcast_to(x1, output_shape)
    x2 = akg.topi.broadcast_to(x2, output_shape)
    # x1 ^ x2 = (x1 | x2) & (~ (x1 & x2))
    res = akg.tvm.compute(
        output_shape, lambda *indice:
        (x1(*indice) | x2(*indice)) & (~(x1(*indice) & x2(*indice))))
    return res
Exemplo n.º 15
0
def dropout_do_mask(data_tensor, data_mask, keep_prob):
    dtype = data_tensor.dtype
    shape_tensor = [x.value for x in data_tensor.shape]
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)
    utils.check_shape(shape_tensor)

    strides = [1]
    for x in reversed(shape_tensor):
        strides.append(strides[-1] * x)

    if keep_prob < 0 or keep_prob > 1:
        raise RuntimeError("keep_prob must in [0,1]")

    keep_prob_const = akg.tvm.const(1.0 / keep_prob, dtype=dtype)
    data_scale_ub = akg.tvm.compute(
        shape_tensor,
        lambda *indices: data_tensor(*indices) * keep_prob_const,
        name='data_scale_ub')

    def get_index(indices):
        idx = 0
        for i in range(len(indices)):
            idx += indices[len(indices) - i - 1] * strides[i]
        return idx // 8

    if dtype == "float32":
        data_scale_ub_16 = akg.topi.cast(data_scale_ub, "float16")
        res_ub_16 = akg.tvm.compute(shape_tensor,
                                lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub_16(*indice)))
        res = akg.topi.cast(res_ub_16, "float32")
    else:
        res = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub(*indice)))

    return res
Exemplo n.º 16
0
def Addn(data, target=utils.CCE):
    """
    Compute sum of all elements in tensor.

    Args:
        data (tvm.tensor.Tensor): Tensor of of type float16, float32.

    Returns:
        tvm.tensor.Tensor, compute result, get all elements' sum.
    
    Supported Platforms:
        'Ascend', 'GPU', 'CPU'
    """
    utils.check_supported_target(target)
    # check types
    dtype = data[0].dtype
    if target == utils.CCE:
        utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT)

    res = data[0]
    for i in range(1, len(data)):
        utils.elemwise_dtype_check(res.dtype, data[i].dtype)
        utils.elemwise_shape_check(res.shape, data[i].shape)
    res = akg.topi.elemwise_sum(data)

    return res
Exemplo n.º 17
0
def truncate_div(input_x1, input_x2):
    """
    Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2).

    Args:
        input_x1 (tvm.tensor.Tensor): Input tensor, support float16,
                                      float32 on mini device, while support
                                      int32, int8, uint8, float16, float32 on
                                      cloud ones.
        input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1.
    Returns:
        A tvm.tensor.Tensor as result of truncate_div.
    """
    utils.check_shape(get_shape(input_x1))
    utils.check_shape(get_shape(input_x2))
    utils.elemwise_dtype_check(input_x1.dtype, input_x2.dtype)
    utils.ops_dtype_check(
        input_x1.dtype,
        (utils.DtypeForDavinci.ALL_FLOAT) if product_is_mini() \
            else (utils.DtypeForDavinci.ALL_FLOAT,
                  utils.DtypeForDavinci.INT32,
                  utils.DtypeForDavinci.INT8,
                  utils.DtypeForDavinci.UINT8))

    return truncate_div_compute(input_x1, input_x2)
Exemplo n.º 18
0
def case_1(data_shape, dtype, kernel_name, attrs):
    """elemwise chain case 1"""
    utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16)
    utils.check_shape_length_equal("data", data_shape, 2)

    m, k = data_shape

    A = akg.tvm.placeholder((m, k), name='A', dtype=dtype)
    B = akg.tvm.placeholder((k, ), name='B', dtype=dtype)
    C = akg.tvm.placeholder((m, k), name='C', dtype=dtype)

    E = akg.tvm.compute((m, k),
                        lambda i, j: A[i, j] * (B[j] + C[i, j]),
                        name="E")

    forward_s = akg.tvm.create_schedule(E.op)
    op_vars = [A, B, C, E]
    akg.lower(forward_s, op_vars, simple_mode=True, polyhedral=True)

    kernel_name = gen_name_kernel(kernel_name, dtype, data_shape)

    with akg.build_config(add_lower_pass=debug_mode(0), dump_pass_ir=True):
        mod = akg.build(forward_s,
                        op_vars,
                        "cce",
                        name="test",
                        attrs=attrs,
                        polyhedral=True)
        return mod
Exemplo n.º 19
0
def mean_v2(data, axis=None, keepdims=False, target=utils.CCE):
    """
    Simple implementation of mean.

    Supported Platforms:
        'Ascend'
    """
    # Check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)

    # Check shape
    shape = [x.value for x in data.shape]
    utils.reduce_axis_check(shape, axis)
    axis = ft_util.refine_reduce_axis(data, axis)

    dtype = data.dtype
    count = 1
    for i in axis:
        count *= shape[i]

    count_rec = 1 / count
    output = sum_v2(data, axis, keepdims, target=target)
    res = output * akg.tvm.const(count_rec, dtype)
    attrs = get_attrs(data)
    if shape_is_dynamic(data):
        attrs["custom_tiling"] = mean_dynamic_tiling_strategy(data, axis)
    return res, attrs
Exemplo n.º 20
0
def apply_rms_prop_mixed_precision(var, ms, mom, grad, lr, momentum, rho,
                                   epsilon):
    """
    Mixed precision version for apply_rms_prop.

    Args:
        var (tvm.tensor.Tensor): The tensor to be updated. Should be float32.
        ms (tvm.tensor.Tensor): Mean square, a tensor of same shape and type as var.
        mom (tvm.tensor.Tensor): A tensor of same shape and type as var.
        grad (tvm.tensor.Tensor): A tensor of same shape and type as var.
        lr (tvm.tensor.Tensor): Learning rate, a scalar tensor of same type as var.
        momentum (float): Coefficient for calculate new mom, 0.0 <= momentum <= 1.0.
        rho (float): Coefficient for calculate new ms, 0.0 <= rho <= 1.0.
        epsilon (float): A small value to prevent division by 0.

    Returns:
        tvm.tensor.Tensor, Updated var of type float32.
        tvm.tensor.Tensor, Updated var of type float16.
        tvm.tensor.Tensor, Updated ms.
        tvm.tensor.Tensor, Updated mom.
    """

    utils.ops_dtype_check(var.dtype, utils.DtypeForDavinci.FLOAT32)
    _apply_rms_prop_check(var, ms, mom, grad, lr, momentum, rho, epsilon)

    out_var, out_var_fp16, out_ms, out_mom = _apply_rms_prop_mixed_precision_compute(
        var, ms, mom, grad, lr, momentum, rho, epsilon)
    out_var, binds_info = TensorUtils.inplace_set(var, out_var, "var_buf")
    out_ms, binds_info2 = TensorUtils.inplace_set(ms, out_ms, "ms_buf")
    out_mom, binds_info3 = TensorUtils.inplace_set(mom, out_mom, "mom_buf")
    binds_info.update(binds_info2)
    binds_info.update(binds_info3)
    attrs = {utils.BINDS: binds_info}
    return out_var, out_var_fp16, out_ms, out_mom, attrs
Exemplo n.º 21
0
def discontinous_mov(data, out_shape, target=utils.CCE):
    """
    Extract the element with the odd index from the original data and copy it into a tensor with a dimension of
    2 * original dimension/2.

    Args:
        data (tvm.tensor.Tensor): Tensor of type float16, float32.
        out_shape (list): a list of output's shape.

    Returns:
           tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape.

    Example:
           if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]].
    """

    # check types
    utils.ops_dtype_check(data.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    shape = [x.value for x in data.shape]
    utils.check_shape(shape)

    output = akg.tvm.compute(out_shape,
                             lambda j, i: data[i * 2],
                             name="output")

    return output
Exemplo n.º 22
0
def matrix_diag(data, out_shape):
    """
    Generate a batched tensor whose value in diagonal lines are defined in `data`.

    Args:
        data (tvm.tensor.Tensor): A tensor of type float16, float32 or int32. Rank is L.
        out_shape (Union[list, tuple]): Output shape of length L + 1.
            The value of `out_shape[0, ..., L-1]` should be equal to `data.shape[0, ..., L-1]`.

    Returns:
        tvm.tensor.Tensor, has same type as "data", shape is "out_shape".
    """
    dtype = data.dtype
    utils.ops_dtype_check(dtype, [utils.DtypeForDavinci.ALL_FLOAT,
                                    utils.DtypeForDavinci.INT32])

    shape = get_shape(data)
    utils.check_shape(data)
    utils.check_shape(out_shape, length=len(shape) + 1)
    if tuple(shape[:-1]) != tuple(out_shape[:-2]):
        raise RuntimeError("The value of out_shape[:-2] should be equal to data.shape[:-1]")

    res = akg.tvm.compute(out_shape,
                          lambda *i: akg.tvm.if_then_else(akg.tvm.all(i[-1] == i[-2], i[-1] < shape[-1]),
                                                          data(*i[:-1]),
                                                          zero_const(dtype)),
                          name="diag")

    return res
Exemplo n.º 23
0
def sparse_softmax_cross_entropy_with_logits(labels, logits, reduction='mean'):
    """
    Computes sparse softmax cross entropy between `logits` and `labels`.

    Note:
        Softmax calculation of Logits is done inside the op.

    Args:
        labels (tvm.tensor.Tensor): int32 tensor of shape [batch_size].
                                    Each entry in it  must be an index in `[0, num_classes)`.
        logits (tvm.tensor.Tensor): float32 or float16 tensor of shape [batch_size, num_class].
        reduction (str): Specifies the reduction to apply to the output: 'none' or 'mean' or 'sum'. Default: 'mean'.
            'none': no reduction for the output
            'sum': the sum for the output
            'mean': the mean for the output.

    Returns:
        tvm.tensor.Tensor, has the same dtype as logits.
        If reduction is 'none', shape of the tensor is the same as logits,
        otherwise shape of the tensor is the same as labels.

    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check(logits.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    strategy, cost, _ = sparse_softmax_cross_entropy_with_logits_impl(
        labels, logits, reduction)
    attr_map = {"custom_tiling": strategy}
    return cost, attr_map
Exemplo n.º 24
0
def asinh(x, target=utils.CCE):
    r"""
    Compute asinh function.

    .. math:: asinh(x) = log(x+\sqrt{x*x+1})

    Args:
        x (tvm.tensor.Tensor): Tensor of type float16, float32. 

    Returns:
       tvm.tensor.Tensor, has the same type and shape as x.
    
    Supported Platforms:
        'Ascend'
    """
    # check shape
    utils.check_shape(x)

    # check input tensor data_type
    utils.ops_dtype_check(x.dtype, utils.DtypeForDavinci.ALL_FLOAT)
    dtype = x.dtype

    # Known that, asinh(x) = log(x + sqrt(x*x+1)), and, asinh(-x) = -asinh(x)
    # If x is a large negative number, (x + sqrt(x*x+1)) will be close to zero.
    # So, asinh(x) = sign(x) * log(|x| + sqrt(|x|*|x| + 1))
    compute_dtype = dtype
    if dtype == "float16":
        # To avoid overflow and higher accuracy, x is casted to float32
        compute_dtype = "float32"
        x = topi.cast(x, compute_dtype)

    x_abs = topi.abs(x)

    if product_is_mini():
        # sqrt(|x|*|x| + 1) = |x| * sqrt(1 + 1/(|x|*|x|))
        vsquare_add_one = topi.add(1,
                                   topi.divide(1, topi.multiply(x_abs, x_abs)))
        sqrt_compute_value = sqrt_mini_newton_iter_impl(vsquare_add_one)
        sqrt_value = topi.multiply(x_abs, sqrt_compute_value)
    else:
        x_abs_square_add_one = topi.add(topi.multiply(x_abs, x_abs), 1)
        sqrt_value = topi.sqrt(x_abs_square_add_one)

    x_add_sqrt = topi.add(x_abs, sqrt_value)

    if product_is_mini():
        log_value = log_compute_mini_impl(x_add_sqrt, target)
    else:
        log_value = topi.log(x_add_sqrt)

    res = topi.multiply(Sign(x, target), log_value)

    if res.dtype != dtype:
        res = topi.cast(res, dtype)

    if product_is_mini():
        attrs = {"enable_auto_inline": False}
        return res, attrs
    return res
Exemplo n.º 25
0
def quantize_chk_cfg_and_gen_outdtype(quant_algo, scale_mode, scale_sqrt,
                                      qdrtensors):
    """check all the params is valid, and general output dtype"""
    # check quantize algorithm and quantize scale type
    if quant_algo is None:
        # quantize switch off
        if scale_mode is not None or scale_sqrt is not None \
                or qdrtensors is not None:
            raise RuntimeError("Invalid Quantize Config.")
        out_dtype = "float16"
        return out_dtype

    # quantize switch on, all quantize params should not be None
    if scale_mode is None or scale_sqrt is None \
            or qdrtensors is None:
        raise RuntimeError("Invalid Quantize Config!")

    if len(quant_algo) != 2 or any([i not in [0, 1] for i in quant_algo]):
        raise RuntimeError("Invalid Quantize Config!!!")

    # check quantize algorithm
    if quant_algo[0] not in (0, 1):
        raise RuntimeError("Quantize algorithm just support 0 for non "
                           "offset and 1 for half offset, but get {}."
                           "".format(quant_algo[0]))

    # check quantize scale type
    if quant_algo[1] != 0:
        raise RuntimeError("Quantize scale only support SCALAR now.")

    # non offset get int8, half offset get uint8
    out_dtype = "int8" if quant_algo[0] == 0 else "uint8"

    if scale_mode not in (0, 1, 2):
        raise ValueError("Invalid scale mode, just support '0,1,2' but get "
                         "{}!".format(scale_mode))
    # now scale mode limit
    if scale_mode in (0, 1):
        raise RuntimeError("quantized_avg_pool just support requantize now!")

    # check scale method
    if scale_sqrt not in (0, 1):
        raise RuntimeError("Invalid scale moethod!")

    # scalar scale type for now
    if len(qdrtensors) != 2:
        raise RuntimeError("qdrtensors should contain two tensors for scale "
                           "and offset!")
    if get_shape(qdrtensors[0]) != [1] or get_shape(qdrtensors[1]) != [1]:
        raise RuntimeError("Scale for dequantize or requantize only "
                           "support scalar tensor.")
    utils.ops_dtype_check(qdrtensors[0].dtype, utils.DtypeForDavinci.FLOAT16)
    utils.ops_dtype_check(qdrtensors[1].dtype, utils.DtypeForDavinci.FLOAT16)
    #  utils.ops_dtype_check(qdrtensors[0].dtype,
    #                          utils.DtypeForDavinci.ALL_FLOAT)
    #  utils.ops_dtype_check(qdrtensors[1].dtype,
    #                          utils.DtypeForDavinci.ALL_FLOAT)

    return out_dtype
Exemplo n.º 26
0
def fused_bn2(mean, var_part, running_mean, running_var, momentum=0.8):
    """
    Calculating mean, variance and update running variables.

    Read fused_bn1 docs for details.

    Note:
        Apply reduction of 'N' axis to calculating mean and variance.

    Args:
        mean (tvm.tensor.Tensor): Tensor of type float32 as mean.
        var_part (tvm.tensor.Tensor): Tensor of type float32, intermediate
                                      variables for variance.
        running_mean (tvm.tensor.Tensor): Tensor of type float32 as trained
                                          mean used in inference stage.
        running_var (tvm.tensor.Tensor): Tensor of type float32 as trained
                                         variance used in inference stage.
        momentum (float): A float number used for updating running values,
                          must meet condition '0.0 < momentum < 1.0'.

    Returns:
        variance (tvm.tensor.Tensor): A float32 tensor as data's variance.
        running_mean_updated (tvm.tensor.Tensor): A float32 tensor as updated
                                                  running_mean (updated inplace).
        running_var_updated (tvm.tensor.Tensor): A float32 tensor, updated
                                                 running_var (updated inplace).
    """
    utils.ops_dtype_check([mean.dtype, var_part.dtype],
                          utils.DtypeForDavinci.FLOAT32)

    dim_info, _ = bn2_set_dim_func(mean, var_part, running_mean, running_var,
                                   momentum)
    attrs = {**ATTR_MAP_BN2}

    in_tensors = (var_part, mean, running_mean, running_var)

    sub_mean_square = akg.tvm.compute(
        mean.shape,
        lambda *i: akg.tvm.const(-1.0, dtype=mean.dtype) * mean(*i) * mean(*i),
        name="sub_mean_square")
    variance = akg.tvm.compute(mean.shape,
                               lambda *i: var_part(*i) + sub_mean_square(*i),
                               name="variance")

    # update running mean and variance
    running_mean_updated = \
        update_by_moving_average(running_mean, mean, momentum)
    running_var_updated = \
        update_by_moving_average(running_var, variance, momentum)

    out_tensors = (variance, running_mean_updated, running_var_updated)
    tensors_and_binds = inplace_operate_bind(in_tensors, out_tensors,
                                             ((2, 1), (3, 2)))
    out_tensors = tensors_and_binds[0]
    attrs[kernel_exec.BINDS] = tensors_and_binds[1]

    if dim_info != "":
        attrs["dim"] = dim_info
    return (*out_tensors, attrs)
Exemplo n.º 27
0
def _div_ascend(data1, data2):
    """
    Calculates x/y, and returns an integer when inputs are all integers.

    When both arguments are integers, use integer division (also known as "floor division").
    When arguments are float numbers, use normal floating point division

    Note:
        div supports broadcasting.

    Args:
        data1 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8.
        data2 (tvm.tensor.Tensor): Tensor of type float16, float32, int32, int8 and uint8.

    Returns:
        tvm.tensor.Tensor, has the same type as data1 and data2.
    """

    utils.ops_dtype_check([data1.dtype, data2.dtype],
                          utils.DtypeForDavinci.ALL_TYPES)
    utils.elemwise_dtype_check(data1.dtype, data2.dtype)
    dtype = data1.dtype

    shape1 = [x.value for x in data1.shape]
    shape2 = [x.value for x in data2.shape]
    utils.check_shape(shape1)
    utils.check_shape(shape2)

    utils.auto_broadcast_check(shape1, shape2)
    n_shape1, n_shape2, out_shape = produce_shapes(shape1, shape2)
    if n_shape1 != out_shape:
        input1_cast = akg.topi.broadcast_to(data1, out_shape)
    else:
        input1_cast = data1
    if n_shape2 != out_shape:
        input2_cast = akg.topi.broadcast_to(data2, out_shape)
    else:
        input2_cast = data2

    if dtype in ("int32", "int8", "uint8"):
        input1p = Case(input1_cast, "float16", utils.CCE)
        input2p = Cast(input2_cast, "float16", utils.CCE)
    else:
        input1p = input1_cast
        input2p = input2_cast

    if product_is_mini():
        input2p_rec = reciprocal(input2p, target=utils.CCE)
        res = akg.topi.multiply(input1p, input2p_rec)
    else:
        res = akg.topi.divide(input1p, input2p)

    if dtype in ("int8", "uint8"):
        res = floor(res, utils.CCE)
        res = Cast(res, "float16", utils.CCE)
    if dtype in ("int32", "int8", "uint8"):
        res = Cast(res, dtype, utils.CCE)

    return res
Exemplo n.º 28
0
 def check_op1(*args):
     dy, data, mean = args
     utils.ops_dtype_check([dy.dtype, data.dtype],
                           utils.DtypeForDavinci.ALL_FLOAT)
     utils.ops_dtype_check(mean.dtype, utils.DtypeForDavinci.FLOAT32)
     shape_nc1hwc0 = get_shape(dy)
     check_shape("NC1HWC0", data, shape_nc1hwc0, "data")
     check_shape("C1C0", mean, shape_nc1hwc0, "mean")
     return shape_nc1hwc0
Exemplo n.º 29
0
def fill(shape, value, dtype, target="cce"):

    utils.ops_dtype_check(
        dtype, [utils.DtypeForDavinci.FLOAT16, utils.DtypeForDavinci.INT32])
    utils.check_shape(shape)

    A = akg.tvm.const(value, dtype)
    res = akg.tvm.compute(shape, lambda *i: A, name="fill")
    return res
Exemplo n.º 30
0
def matmul(x,
           y,
           b,
           out_dtype,
           left_format="zZ",
           right_format="nZ",
           out_format="zN",
           transpose_x=False,
           transpose_y=False,
           attrs=None,
           target=utils.CCE):
    """
    Computes matrix multiplication x * y + b.

    Args:
        x: akg.tvm.Tensor of type int8, uint8, float16, float32, int32. Left matrix.
        y: akg.tvm.Tensor of same type as x. Right matrix.
        b: akg.tvm.Tensor of same type as x. Bias tensor.
        out_dtype: str. Data type of output tensor.
        left_format: str. Data format of left matrix. Supported data format list ["zZ", "nZ", "zN"].
        right_format: str. Data format of right matrix. Supported data format list ["zZ", "nZ", "zN"].
        out_format: str. Data format of output tensor. Supported data format list ["zZ", "nZ", "zN"].
        transpose_x: Boolean. Specifies whether x is transposed or not.
        transpose_y: Boolean. Specifies whether y is transposed or not.
        attrs: Dict. Used in matmul computation.

    Note:
        before call matmul, 2d to Fractal is needed.

    Returns:
        akg.tvm.Tensor with type out_dtype.

    Supported Platforms:
        'Ascend'
    """
    utils.ops_dtype_check([x.dtype, y.dtype], utils.DtypeForDavinci.ALL_FLOAT)
    shape_x = [shape_element.value for shape_element in x.shape]
    utils.check_shape(shape_x)
    shape_y = [shape_element.value for shape_element in y.shape]
    utils.check_shape(shape_y)
    if left_format not in ["zZ", "zN"]:
        raise ValueError("unsupport left_format now: %s" % left_format)
    if right_format not in ["nZ", "zZ", "zN"]:
        raise ValueError("unsupport right_format now: %s" % right_format)
    if out_format not in ["zN", "zZ"]:
        raise ValueError("unsupport out_format now: %s" % out_format)

    out = matmul4d_compute(x, y, b, out_dtype, left_format, right_format,
                           out_format, transpose_x, transpose_y, attrs)
    attr_map = {"pragma_rmselfdep": False}

    dims_info, _ = matmul_set_dim(x, y, b, out_dtype, left_format,
                                  right_format, out_format, transpose_x,
                                  transpose_y)
    attr_map["dim"] = dims_info

    return out, attr_map