def Addn(data, target=utils.CCE): """ Compute sum of all elements in tensor. Args: data (tvm.tensor.Tensor): Tensor of of type float16, float32. Returns: tvm.tensor.Tensor, compute result, get all elements' sum. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) # check types dtype = data[0].dtype if target == utils.CCE: utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_FLOAT) res = data[0] for i in range(1, len(data)): utils.elemwise_dtype_check(res.dtype, data[i].dtype) utils.elemwise_shape_check(res.shape, data[i].shape) res = akg.topi.elemwise_sum(data) return res
def ExpandDims(data, axis, target=utils.CCE): """ Computes data1 elementwise. Args: data1 (tvm.tensor.Tensor): Tensor. axis (int): axis. Returns: tvm.tensor.Tensor, expand the dimension of data1. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) if target == utils.CCE: utils.ops_dtype_check( data.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) res = akg.topi.expand_dims(data, axis, 1) else: res = akg.topi.expand_dims(data, axis) return res
def maximum(data1, data2, target=utils.CCE): """ Take element-wise maximum of two tensors with auto-broadcasting. Args: data1: tvm.tensor.Tensor data2: tvm.tensor.Tensor Returns: tvm.tensor.Tensor of maximum of two tensors. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) utils.elemwise_dtype_check(data1.dtype, data2.dtype) dtype = data1.dtype need_cast = True if target == utils.CCE and dtype in ["int8", "uint8" ] else False if need_cast: data1 = Cast(data1, "float16") data2 = Cast(data2, "float16") res = topi.maximum(data1, data2) if need_cast: res = Cast(res, dtype) return res
def reduce_sum(inputs, axis=None, keepdims=False, target=utils.CCE): """ Compute the sum of elements across dimensions of a tensor. Args: inputs (tvm.tensor.Tensor): Tensor. axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained with length 1, else these reduced axis will be eliminate. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return ascend_sum(inputs, axis, keepdims) axis = refine_reduce_axis(inputs, axis) utils.check_shape(inputs.shape) in_dtype = inputs.dtype if in_dtype == 'float16': inputs = akg.topi.cast(inputs, 'float32') output = akg.topi.sum(inputs, axis=axis, keepdims=keepdims) if in_dtype == 'float16': output = akg.topi.cast(output, 'float16') return output
def round_(data, target=utils.CCE): """ Round elements of x to nearest integer. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, unit8, int32. Returns: tvm.tensor.Tensor of same type and shape as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) in_type = data.dtype if target == utils.CCE: if in_type != 'float16': data = akg.tvm.compute(data.shape, lambda *i: data(*i).astype("float16"), name="data_f16") return akg.lang.ascend.round(data) if in_type == 'float16': data = akg.topi.cast(data, 'float32') output = akg.topi.round(data) if in_type == 'float16': output = akg.topi.cast(output, 'float16') return output
def neg(data, target=utils.CCE): """ Computes negative value of input tensor. Args: data(tvm.tensor.Tensor): Tensor of type float16, float32, int32. Returns: tvm.tensor.Tensor of same type and shape as input tensor data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data.shape) if target == utils.CCE: data_type = data.dtype utils.ops_dtype_check( data_type, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) pone = akg.tvm.const(-1.0, dtype=data_type) res = akg.lang.ascend.vmuls(data, pone) if data_type == "int32": res = akg.topi.cast(res, "int32") else: res = akg.topi.negative(data) return res
def sqrt(data, target=utils.CUDA): """ Computes square root of x element-wise. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has same type and shape as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_supported_target(target) if target == utils.CCE: return _sqrt_ascend(data) check_list = ["float16", "float32"] dtype = data.dtype if not dtype in check_list: raise RuntimeError("Sqrt cce only support %s while dtype is %s" % ( ",".join(check_list), dtype)) shape = [x.value for x in data.shape] utils.check_shape(shape) res = akg.topi.sqrt(data) return res
def Divide(lhs, rhs, target=utils.CCE): """ Calculate divide. Args: lhs: The left tensor. rhs: The right tensor. Returns: tvm.tensor.Tensor. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _div_ascend(lhs, rhs) shape_l = [x.value for x in lhs.shape] shape_r = [x.value for x in rhs.shape] utils.check_shape(shape_l) utils.check_shape(shape_r) utils.auto_broadcast_check(shape_l, shape_r) utils.elemwise_dtype_check(lhs.dtype, rhs.dtype) output = akg.topi.divide(lhs, rhs) return output
def mul(l_input, r_input, target=utils.CCE): """ Calculate x * y element-wise. Note: mul supports broadcasting. Args: l_input (tvm.tensor.Tensor): Tensor of type float16, float32. r_input (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has the same type as l_input and r_input. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.ops_dtype_check([l_input.dtype, r_input.dtype], utils.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in l_input.shape] shape2 = [x.value for x in r_input.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) utils.elemwise_dtype_check(l_input.dtype, r_input.dtype) output = akg.topi.multiply(l_input, r_input) return output
def less(data1, data2, target=utils.CCE): """ compute tensor with smaller value in data1 and data2 elementwisely. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. Returns: tvm.tensor.Tensor. If data1 less than data2, return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(data1.shape) utils.check_shape(data2.shape) # check types if target == utils.CCE: utils.elemwise_dtype_check( data1.dtype, data2.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT32]) # check runtime mode, and change dtype if product_is_mini() and data1.dtype != "float16": data1 = akg.topi.cast(data1, "float16") data2 = akg.topi.cast(data2, "float16") if (not product_is_mini()) and data1.dtype == "int32": data1 = akg.topi.cast(data1, "float32") data2 = akg.topi.cast(data2, "float32") res = akg.topi.less(data1, data2) return res
def greater_equal(data1, data2, target=utils.CCE): """ Check whether input1 greaterquals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 greaterquals to input2 return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) # check shapes shape1 = [x.value for x in data1.shape] shape2 = [x.value for x in data2.shape] shapes = [shape1, shape2] for _, shape in enumerate(shapes): utils.check_shape(shape) # check types dtype = data1.dtype dtype2 = data2.dtype utils.elemwise_dtype_check(dtype, dtype2) if target == utils.CCE: utils.ops_dtype_check(dtype, utils.DtypeForDavinci.FLOAT16) res = akg.topi.greater_equal(data1, data2) return res
def logical_not(input1, target=utils.CCE): """ Compute logical_not of input1. Args: input1 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) res = akg.topi.logical_not(input1) return res
def Exp(data, target=utils.CCE): """ Calculate exponential of input data. Args: input (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor, has the same type as input. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _exp_ascend(data) else: return _exp(data)
def rsqrt(data, target=utils.CCE): """ Computes data1 elementwise. Args: data1 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor, inverse sqaure root of data1, with same type as input tensors. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _rsqrt_ascend(data) else: return _rsqrt(data)
def AbsSum(inputs, axis=None, keepdims=False, target=utils.CCE): """ Computes the sum of absolute value of inputs tensor along axis. Args: inputs: The inputs akg.tvm.tensor. axis: An integer, specifies the dimensions to reduce when performing the sum operation. keepdims: A boolean, if True, retains reduced dimensions with length 1, default value is False. Returns: A akg.tvm.Tensor of same type as inputs. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) input_abs = Abs(inputs, target) return sum(input_abs, axis, keepdims, target=target)
def log(data, target=utils.CCE): """ Computes log(data) elementwise Args: data (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor, with same type as input tensors. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _log_ascend(data) else: return _log(data)
def not_equal(data1, data2, target=utils.CCE): """ check whether data1 notequals to data2. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If data1 notequal to data2 return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _not_equal_ascend(data1, data2) else: return _not_equal(data1, data2)
def Add(data1, data2, scale=1.0, polyhedral=True, attrs={}, target=utils.CCE): """ Computes data1 + data2 elementwise, broadcast is supported. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen. Returns: tvm.tensor.Tensor, add result, with same type as input tensors and broadcasted shape of data1 and data2. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _add_ascend(data1, data2, scale, polyhedral, attrs) else: return _add(data1, data2)
def Cast(data, dst_type, target=utils.CCE): """ cast data to target type. Args: data (tvm.tensor.Tensor): Tensor to be casted. dst_type (str): target cast type. Returns: tvm.tensor.Tensor, type is dst_type. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _cast_ascend(data, dst_type) else: return _cast(data, dst_type)
def less_equal(data1, data2, target=utils.CCE): """ Check whether input1 lessequals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 lessequal to input2 return True, else return False. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _less_equal_ascend(data1, data2) else: return _less_equal(data1, data2)
def pow_(data1, data2, target=utils.CCE): """ Computes power(data1,data2) elementwise, broadcast is supported. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen. Returns: tvm.tensor.Tensor, powered result, with same type as input tensors and broadcasted shape of data1 and data2. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _pow_ascend(data1, data2, target) return _pow(data1, data2)
def reduce_min(inputs, axis=None, keepdims=False, target=utils.CCE): """ Compute the min of elements across dimensions of a tensor. Args: inputs (tvm.tensor.Tensor): Tensor. axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained with length 1, else these reduced axis will be eliminate. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target == utils.CCE: return _reduce_min_max_ascend(inputs, axis, keepdims, "min") return _reduce_min(inputs, axis, keepdims)
def reduce_prod(data, axis=None, keepdims=False, target=utils.CCE): """ Computes the product of elements along specific axis Args: data (tvm.tensor.Tensor): indicating the input tensor. axis (Union[list, tuple, int, None]): indicating the dimensions to reduce at. if it's None, all dimensions will be reduced. keepdims (Union[bool, None]): if true, keep the dimensions with length 1. Returns: Tensor, the product of elements of input tensor. Supported Platforms: 'Ascend', 'GPU' """ utils.check_supported_target(target) shape = [x.value for x in data.shape] utils.ops_dtype_check(data.dtype, [utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.UINT8]) if axis is None and keepdims is False: raise ValueError("keepdims must be True when axis is None!") axis_new = refine_reduce_axis(data, axis) if target == utils.CUDA: return akg.topi.prod(data, axis=axis, keepdims=keepdims) utils.check_shape(shape) dtype = data.dtype if dtype in ["int8", "uint8"]: data = akg.topi.cast(data, "float16") vlog_t = log(data, target) res = akg.topi.sum(vlog_t, axis=axis_new, keepdims=keepdims) res = Exp(res, target) if dtype in ["int8", "uint8"]: res = akg.topi.cast(res, dtype) return res
def transpose(data, axes, target=utils.CCE): """ Permute the dimensions of the input data. Args: data (tvm.tensor.Tensor): Tensor. axes (Union[list, tuple]): Elements must be int. The index of each dimensions. Returns: tvm.tensor.Tensor, has the same dtype as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) if target != utils.CCE: utils.check_shape(data.shape) utils.check_int_list(axes, "axes") return akg.topi.transpose(data, axes) else: return _transpose_ascend(data, axes)
def Assign(ref, val, target=utils.CUDA): """ Assign val to ref. Args: ref: Tensor, which is mutable. val: Tensor, which will be assigned to ref. Returns: fake_output: Tensor, all zeros has the same shape as ref, needed by ME. ref_val: Tensor, ref assigned with val. attrs: Dictionary, indicates that ref and ref_val share the same buf. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) dtype = val.dtype utils.ops_dtype_check(dtype, [ utils.DtypeForDavinci.ALL_FLOAT, utils.DtypeForDavinci.INT8, utils.DtypeForDavinci.INT16, utils.DtypeForDavinci.INT32, utils.DtypeForDavinci.INT64, utils.DtypeForDavinci.UINT8, utils.DtypeForDavinci.UINT16, utils.DtypeForDavinci.UINT32, utils.DtypeForDavinci.UINT64 ]) shape1 = [x.value for x in ref.shape] shape2 = [x.value for x in val.shape] if shape1 != shape2: raise RuntimeError("assign operations need input shape equal!") utils.check_shape(shape2) ref_val = akg.tvm.compute(shape2, lambda *indice: val(*indice), name="ref_val") ref_val, binds_info = TensorUtils.inplace_set(ref, ref_val) attrs = {utils.BINDS: binds_info} fake_output = akg.tvm.compute(ref.shape, lambda *indice: ref_val(*indice), name="fake_output") return fake_output, ref_val, attrs
def Abs(in_data, target=utils.CCE): """ Compute absolute value of a tensor. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32, int8, unit8, int32. Returns: tvm.tensor.Tensor of same type and shape as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.check_shape(in_data.shape) in_type = in_data.dtype if target == utils.CCE: utils.ops_dtype_check(in_type, utils.DtypeForDavinci.ALL_TYPES) need_cast_dtype = ["int8", "int32", "uint8"] if in_type in need_cast_dtype: in_data = akg.tvm.compute( in_data.shape, lambda *indice: in_data(*indice).astype("float16"), name='type_cast') output = akg.tvm.compute(in_data.shape, lambda *index: akg.tvm.abs(in_data(*index)), name='abs_value') if in_type in need_cast_dtype: output = akg.tvm.compute( in_data.shape, lambda *indice: output(*indice).astype(in_type), name='res') else: if in_type == 'float16': in_data = akg.topi.cast(in_data, 'float32') output = akg.topi.abs(in_data) if in_type == 'float16': output = akg.topi.cast(output, 'float16') return output
def minimum(input1, input2, target=utils.CCE): """ Return the min value of two tensors element-wise. Note: minimum supports broadcasting. Args: input1: Tensor. input2: Tensor. Has the same type as input1. Returns: Tensor, has the same type as inputs. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.ops_dtype_check([input1.dtype, input2.dtype], utils.DtypeForDavinci.ALL_TYPES) utils.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] utils.check_shape(shape1) utils.check_shape(shape2) utils.auto_broadcast_check(shape1, shape2) need_cast = True if target == utils.CCE and dtype in ["int8", "uint8" ] else False if need_cast: input1 = Cast(input1, "float16", target) input2 = Cast(input2, "float16", target) res = akg.topi.minimum(input1, input2) if need_cast: res = Cast(res, dtype, target) return res
def tile(data, multiples, target=utils.CCE): """ Repeats the data in the specified dimensions according to the multiples. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. multiples (Union[list, tuple]): Elements must be int. The number of repetitions. Returns: tvm.tensor.Tensor, has the same dtype as data. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) shape = [x.value for x in data.shape] dtype = data.dtype utils.check_shape(shape) utils.ops_dtype_check(dtype, utils.DtypeForDavinci.ALL_TYPES) utils.check_int_list(multiples, "multiples") output = akg.topi.tile(data, multiples) return output
def reduce_and(inputs, axis=None, keepdims=False, target=utils.CUDA): """ Compute the logical and of elements across dimensions of a tensor. Args: inputs (tvm.tensor.Tensor): Tensor. axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained with length 1, else these reduced axis will be eliminate. Supported Platforms: 'GPU', 'CPU' """ utils.check_supported_target(target) axis = ft_util.refine_reduce_axis(inputs, axis) utils.check_shape(inputs.shape) output = akg.topi.all(inputs, axis=axis, keepdims=keepdims) return output
def logical_and(input1, input2, target=utils.CCE): """ Compute logical_and of input1 and input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. logical_and of input1 and input2. Supported Platforms: 'Ascend', 'GPU', 'CPU' """ utils.check_supported_target(target) utils.elemwise_dtype_check(input1.dtype, input2.dtype) shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] utils.check_shape(shape1) utils.check_shape(shape2) res = akg.topi.logical_and(input1, input2) return res