def l2loss(data): dtype = data.dtype check_list = ["float16", "float32"] if not (dtype.lower() in check_list): raise RuntimeError("tile_cce only support %s while dtype is %s" % (",".join(check_list), dtype)) vc_util.check_shape(data.shape) orig_dtype = dtype if dtype.lower() == "float16": dtype = "float32" data = akg.topi.cast(data, dtype) # code has bug #shape, axis = simplify_axis_shape(shape, range(len(shape))) coeff_sqrt = akg.tvm.const(1.0 / (2 ** (0.5)), dtype=dtype) res = akg.lang.cce.vmuls(data, coeff_sqrt) res = akg.lang.cce.vmul(res, res) res, _ = sum.sum_value(res) if dtype != orig_dtype: res = akg.topi.cast(res, orig_dtype) return res
def gather(params_shape, indices_shape, params_dtype, indices_dtype, axis, kernel_name, cce_path="./"): """Gather data by indices""" vc_util.check_shape(params_shape, length=2) vc_util.check_shape(indices_shape, length=1) vc_util.ops_dtype_check(params_dtype, vc_util.DtypeForDavinci.ALL_TYPES) vc_util.ops_dtype_check(indices_dtype, vc_util.DtypeForDavinci.INT32) vc_util.check_equal("axis", "zero", axis, 0) # construct compute o_shape = (indices_shape[0], params_shape[1]) xx = akg.tvm.placeholder(params_shape, dtype=params_dtype, name="X") yy = akg.tvm.placeholder(indices_shape, dtype=indices_dtype, name="Y") res = akg.tvm.extern(o_shape, [xx, yy], lambda ins, outs: kernel_ir(outs[0], ins[0], ins[1]), name="res", dtype=params_dtype) s = akg.tvm.create_schedule(res.op) # create cce attrs = {"enable_multicore": False} with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod = akg.build(s, [xx, yy, res], "cce", name=kernel_name, attrs=attrs) source_code = mod.imported_modules[0].get_source() utils.create_code(kernel_name, cce_path, source_code) return mod
def dropout_do_mask(data_tensor, data_mask, keep_prob): dtype = data_tensor.dtype shape_tensor = [x.value for x in data_tensor.shape] vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(shape_tensor) strides = [1] for x in reversed(shape_tensor): strides.append(strides[-1] * x) if keep_prob < 0 or keep_prob > 1: raise RuntimeError("keep_prob must in [0,1]") keep_prob_const = akg.tvm.const(1.0 / keep_prob, dtype=dtype) data_scale_ub = akg.tvm.compute( shape_tensor, lambda *indices: data_tensor(*indices) * keep_prob_const, name='data_scale_ub') def get_index(indices): idx = 0 for i in range(len(indices)): idx += indices[len(indices) - i - 1] * strides[i] return idx // 8 if dtype == "float32": data_scale_ub_16 = akg.topi.cast(data_scale_ub, "float16") res_ub_16 = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub_16(*indice))) res = akg.topi.cast(res_ub_16, "float32") else: res = akg.tvm.compute(shape_tensor, lambda *indice: dav.dropout(data_mask[get_index(indice)], data_scale_ub(*indice))) return res
def logical_not(inputs): vc_util.ops_dtype_check(inputs.dtype, vc_util.DtypeForDavinci.BOOL) vc_util.check_shape(inputs.shape) res = akg.topi.logical_not(inputs) return res
def discontinous_mov(data, out_shape): """ Extract the element with the odd index from the original data and copy it into a tensor with a dimension of 2 * original dimension/2. Args: data (tvm.tensor.Tensor): Tensor of type float16, float32. out_shape (list): a list of output's shape. Returns: tvm.tensor.Tensor, has the same type as data, but it's shape changes to out_shape not data's shape. Example: if data = [1,2,3,4,5,6,7,8,9,10] then the output = [[1,3,5,7,9],[1,3,5,7,9]]. """ # check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] vc_util.check_shape(shape) output = akg.tvm.compute(out_shape, lambda j, i: data[i * 2], name="output") return output
def invert_permutation_run(shape, dtype, attrs): # check shapes vc_util.check_shape(shape) if not (dtype.lower() in "int32"): raise RuntimeError( "indices_dtype only support int32 while dtype is %s" % dtype) A = akg.tvm.placeholder(shape, dtype, name="A") op = invert_permutation.invert_permutation(A) s = akg.tvm.create_schedule(op.op) kernel_name = utils.gen_name_kernel("invert_permutation", dtype, shape) with akg.build_config(add_lower_pass=cce.debug_mode(0), dump_pass_ir=True): mod = akg.build(s, [A, op], "cce", name=kernel_name, attrs=attrs, polyhedral=True) input_data = np.random.permutation(np.arange(shape[0])).astype(np.int32) expect = np.full([shape[0]], 0, np.int32) for i, e in enumerate(input_data): expect[e] = i output = np.full([shape[0]], 0, np.int32) output = utils.mod_launch(mod, (input_data, output), expect=expect) return (input_data, ), output, expect, compare_tensor(output, expect, rtol=5e-03, equal_nan=True)
def pow(data1, data2): """ Computes power(data1,data2) elementwise, broadcast is supported. Args: data1 (tvm.tensor.Tensor): Tensor. data2 (tvm.tensor.Tensor): Tensor of same type as data1, if shape(data2) != shape(data1), broadcast will happen. Returns: tvm.tensor.Tensor, powered result, with same type as input tensors and broadcasted shape of data1 and data2. """ vc_util.elemwise_dtype_check(data1.dtype, data2.dtype) vc_util.check_shape(data1.shape) vc_util.check_shape(data2.shape) vc_util.auto_broadcast_check(data1.shape, data2.shape) in_dtype = data1.dtype if in_dtype == 'float16': data1 = akg.topi.cast(data1, 'float32') data2 = akg.topi.cast(data2, 'float32') res = akg.topi.power(data1, data2) if in_dtype == 'float16': res = akg.topi.cast(res, 'float16') return res
def tanh_grad(data_y, data_dy): """ Compute the backpropogation gradient of tanh. Args: data_y: Tensor, which equals the output of tanh. data_dy: Tensor, the initial gradients. Return: Tensor, overall gradients. """ dtype = data_y.dtype vc_util.ops_dtype_check(data_y.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data_y.shape] vc_util.check_shape(shape) # dx = dy * (1 - y*y) tmp1 = akg.tvm.const(-1, dtype=dtype) tmp2 = akg.tvm.const(1, dtype=dtype) data1_square = akg.lang.cce.vmul(data_y, data_y) data_tmp = akg.lang.cce.vmuls(data1_square, tmp1) anuminate = akg.lang.cce.vadds(data_tmp, tmp2) res = akg.lang.cce.vmul(anuminate, data_dy) return res
def reduce_prod(data, axis=None, keepdims=False): """ Computes the product of elements along specific axis Args: data (tvm.tensor.Tensor): indicating the input tensor. axis (Union[list, tuple, int, None]): indicating the dimensions to reduce at. if it's None, all dimensions will be reduced. keepdims (Union[bool, None]): if true, keep the dimensions with length 1. Returns: Tensor, the product of elements of input tensor. """ shape = [x.value for x in data.shape] ops_dtype_check(data.dtype, [ DtypeForDavinci.ALL_FLOAT, DtypeForDavinci.INT8, DtypeForDavinci.UINT8 ]) if axis is None and keepdims is False: raise ValueError("keepdims must be True when axis is None!") axis_new = ft_util.refine_reduce_axis(data, axis) check_shape(shape) dtype = data.dtype if dtype in ["int8", "uint8"]: data = akg.topi.cast(data, "float16") vlog_t = akg_log(data) res = akg.topi.sum(vlog_t, axis=axis_new, keepdims=keepdims) res = akg_exp(res) if dtype in ["int8", "uint8"]: res = akg.topi.cast(res, dtype) return res
def reciprocal(data, high_precision=True): """ Computes the reciprocal of data element-wise. Args: data (list[tvm.tensor.Tensor]): a list of tvm.tensor.Tensor of type float16, float32. high_precision (bool): a bool value, whether to use high-precision version. Returns: tvm.tensor.Tensor of same type and shape as data. """ vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = [x.value for x in data.shape] vc_util.check_shape(shape) res = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, data.dtype) / (data(*indice)), name="res") # When product is mini, using Newtom iteration method to achieve higher precision. if utils.product_is_mini() and high_precision: steps = 1 for _ in range(steps): temp1 = data * res temp2 = temp1 * akg.tvm.const(-1, data.dtype) temp3 = temp2 + akg.tvm.const(2, data.dtype) res = temp3 * res return res
def bias_add_ad(head, input_shape, data_format): """ Compute gradient for bias_add operator using automatic differentiate. Args: head (tvm.tensor.Tensor): Input tensor. input_shape (Union[list, tuple]): Input shape of head. data_format (str): Data format of input tensors. Returns: tvm.tensor.Tensor of same shape and type as head. """ check_list = ["NHWC", "NC1HWC0", "DefaultFormat"] if data_format not in check_list: raise RuntimeError("bias_add_grad only support %s while dataformat is %s" % (",".join(check_list), data_format)) vc_util.check_shape(head.shape) shape1 = [x.value for x in head.shape] vc_util.davinci_format_check(shape1, data_format) a = akg.tvm.placeholder(head.shape, head.dtype, "A") if data_format == "NC1HWC0": bias_shape = (1, head.shape[1], 1, 1, head.shape[4]) b = akg.tvm.placeholder(bias_shape, head.dtype, "B") elif data_format == "NHWC": bias_shape = (input_shape[-1],) b = akg.tvm.placeholder(bias_shape, head.dtype, "B") else: bias_shape = (input_shape[1],) b = akg.tvm.placeholder(bias_shape, head.dtype, "B") c = bias_add.bias_add(a, b, data_format) jacs = list(akg.differentiate(c, [b], head)) attrs = {} return jacs[0], attrs
def reduce_any_d(x, axis=None, keepdims=False): """ Reduce a tensor on a certain axis based on max. Args: x (tvm.tensor.Tensor): The input tensor to reduce. Should be of type int8. axis (Union[list, tuple, int, None]): The dimensions to reduce. If None, all dimensions will be reduced. each dim must be in the range [-len(data.shape), len(data.shape) - 1]. keepdims (Union[bool, None]): If True, retains reduced dimensions with length 1, defaults to False. Returns: tvm.tensor.Tensor of same type as input tensor x. """ # check type vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.INT8) vc_util.check_shape(x.shape) # check axis vc_util.reduce_axis_check(x.shape, axis) refined_axis = refine_reduce_axis(x, axis) if len(set(refined_axis)) == len(x.shape) and not keepdims: keepdims = True res = _reduce_any_d_compute(x, refined_axis, keepdims) if len(set(refined_axis)) == len(x.shape): res = topi.reshape(res, (1, )) return res
def less(data1, data2): """ compute tensor with smaller value in data1 and data2 elementwisely. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32 and int32. Returns: tvm.tensor.Tensor. If data1 less than data2, return True, else return False. """ vc_util.check_shape(data1.shape) vc_util.check_shape(data2.shape) # check types vc_util.elemwise_dtype_check( data1.dtype, data2.dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) # check runtime mode, and change dtype if utils.product_is_mini() and data1.dtype != "float16": data1 = akg.topi.cast(data1, "float16") data2 = akg.topi.cast(data2, "float16") if (not utils.product_is_mini()) and data1.dtype == "int32": data1 = akg.topi.cast(data1, "float32") data2 = akg.topi.cast(data2, "float32") res = akg.topi.less(data1, data2) return res
def truncate_div(input_x1, input_x2): """ Calculating data's truncate_div, res = floor(x1/x2) if x1/x2>0 else ceil(x1/x2). Args: input_x1 (tvm.tensor.Tensor): Input tensor, support float16, float32 on mini device, while support int32, int8, uint8, float16, float32 on cloud ones. input_x2 (tvm.tensor.Tensor): Input tensor, with same dtype as input_x1. Returns: A tvm.tensor.Tensor as result of truncate_div. """ vc_util.check_shape(get_shape(input_x1)) vc_util.check_shape(get_shape(input_x2)) vc_util.elemwise_dtype_check(input_x1.dtype, input_x2.dtype) vc_util.ops_dtype_check( input_x1.dtype, (vc_util.DtypeForDavinci.ALL_FLOAT) if utils.product_is_mini() \ else (vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32, vc_util.DtypeForDavinci.INT8, vc_util.DtypeForDavinci.UINT8)) return truncate_div_compute(input_x1, input_x2)
def broadcast_to(x, shape): """ Broadcast an tensor to a compatible shape. Args: x (tvm.tensor.Tensor): Tensor of type float32, float16, int8, uint8, int32 shape (list, tuple): The shape of output tensor. Returns: An tvm.tensor.Tensor with the same type as x. """ # check shape vc_util.check_shape(x) vc_util.check_shape(shape) # check dtype dtype = x.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) # vector_dup instruction don't support int8 and uint8 # It can be simplified by some methods, such as , "auto cast" x_shape = get_shape(x) if len(x_shape) == 1 and x_shape[0] == 1 and dtype in ["int8", "uint8"]: x = cast(x, "float16") res = topi.broadcast_to(x, shape) if res.dtype != dtype: res = cast(res, dtype) return res
def floordiv(data1, data2): """ Calculate x/y, and always returns an integer which is floored. Args: data1 (tvm.tensor.Tensor): Tensor of type float16, float32. data2 (tvm.tensor.Tensor): Tensor of type float16, float32. Returns: tvm.tensor.Tensor, has type of int32. """ vc_util.ops_dtype_check([data1.dtype, data2.dtype], vc_util.DtypeForDavinci.ALL_FLOAT) shape1 = [x.value for x in data1.shape] vc_util.check_shape(shape1) shape2 = [x.value for x in data2.shape] vc_util.check_shape(shape2) if utils.product_is_mini(): rec = reciprocal(data2, high_precision=True) res = data1 * rec else: res = akg.topi.divide(data1, data2) res = akg.lang.cce.floor(res) return res
def xdivy(data_x1, data_x2): """ Calculate data_x1 divided by data_x2. .. math:: y = \\left\\{ \\begin{aligned} 0, && if \\quad x1 == 0 \\\\ \\dfrac{x1}{x2}, && otherwise \\end{aligned} \\right. Args: data_x1 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" data_x2 (tvm.tensor.Tensor): Tensor of dtype "float16" or "float32" Returns: tvm.tensor.Tensor """ shape_x1 = get_shape(data_x1) shape_x2 = get_shape(data_x2) vc_util.check_shape(shape_x1) vc_util.check_shape(shape_x2) vc_util.elemwise_dtype_check(data_x1.dtype, data_x2.dtype) dtype = data_x1.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) return xdivy_compute(data_x1, data_x2)
def clip(data, min_val, max_val): """ Clip the data in range(min_val, max_val). Change values less than min_val in data to min_val, and change values greater than max_val to max_val. Note: min_val should be smaller or equal to max_val. Args: data: Tensor. min_val: Float. When data < min_val, set data to min_val. max_val: Float. When data > max_val, set data to max_val. Returns: Tensor, has the same type and shape as data. """ dtype = data.dtype check_list = ["float16", "float32"] if not dtype.lower() in check_list: raise RuntimeError("clip only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = data.shape vc_util.check_shape(shape) res = akg.topi.clip(data, min_val, max_val) return res
def reduce_sum(inputs, axis=None, keepdims=False): """ Compute the sum of elements across dimensions of a tensor. Args: inputs (tvm.tensor.Tensor): Tensor. axis (Union[list, tuple, int, None]): If the list or tuple is empty, the axis equal to None. keepdims (bool): If keepdims equal to True, the result shape length is same to input shape length. Returns: tvm.tensor.Tensor, has same type as input. If keepdims is True, all reduced dimensions are retained with length 1, else these reduced axis will be eliminate. """ axis = ft_util.refine_reduce_axis(inputs, axis) vc_util.check_shape(inputs.shape) in_dtype = inputs.dtype if in_dtype == 'float16': inputs = akg.topi.cast(inputs, 'float32') output = akg.topi.sum(inputs, axis=axis, keepdims=keepdims) if in_dtype == 'float16': output = akg.topi.cast(output, 'float16') return output
def less_equal(input1, input2): """ Check whether input1 lessequals to input2. Args: input1 (tvm.tensor.Tensor): Tensor. input2 (tvm.tensor.Tensor): Tensor. Returns: tvm.tensor.Tensor. If input1 lessequal to input2 return True, else return False. """ shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) shape1, shape2, shape = produce_shapes(shape1, shape2) vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype # get lessequal compute t_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(1, dtype), "T") f_value = akg.tvm.compute(shape, lambda *indice: akg.tvm.const(0, dtype), "F") input1_bro = akg.topi.broadcast_to(input1, shape) input2_bro = akg.topi.broadcast_to(input2, shape) c_out = akg.tvm.compute(shape, lambda *indice: akg.tvm.expr.Select(input1_bro[indice] <= input2_bro[indice], t_value[indice], f_value[indice]), name="C") res = akg.tvm.compute(shape, lambda *indice: c_out(*indice).astype("bool"), name="res") return res
def logsoftmax_grad(Y, dY, axis): """ Computes the back propagation gradients by chain rule. Args: Y: Tensor, holds the logsoftmax activation output. dY: Tensor, holds the initial gradients. axis: Integer, on which dimension the softmax is applied. Returns: Tensor, the overall gradients. """ shape = [x.value for x in Y.shape] vc_util.check_shape(shape) dtype = Y.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) if axis == -1: axis = len(shape) + axis if axis >= len(shape): raise RuntimeError("axis should be less than dimension") if axis < -1: raise RuntimeError( "negative axis only support -1, please specify the axis in positive value" ) softmax = akg.topi.exp(Y) dy_sum = akg.lang.cce.sum(dY, axis=axis) dy_sum_broadcast = akg.lang.cce.broadcast(dy_sum, shape) mul_result = akg.lang.cce.vmul(softmax, dy_sum_broadcast) res = akg.lang.cce.vsub(dY, mul_result) attrs = {"pragma_reschedule": 1, "pragma_modshift": 1} return res, attrs
def concat(data, axis): """ Concatenates data along the dimension set by axis. Args: data (Union[list, tuple]): list or tuple of tvm.tensor.Tensor of type float16, float32, int32, int8, uint8 axis (int): Specifies the axis along which to concatenate. Must be in the range [-rank(data), rank(data)) Returns: tvm.tensor.Tensor of same type as data. """ data_size = len(data) if data_size < min_size: raise RuntimeError("The size of data must be greater equal 1") dtype = data[0].dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_TYPES) shape_0 = data[0].shape vc_util.check_shape(shape_0) if axis < 0: axis += len(shape_0) for i in range(1, data_size): shape_i = data[i].shape vc_util.check_shape(shape_i) if len(shape_i) != len(shape_0): raise ValueError("Input tensors must have same dimensions.") res = akg.lang.cce.concat(data, axis) return res
def acos_grad(x, dy): """ Gradient for acos. .. math: dx = [\\frac{-1}{(1 - x^2)^0.5} / ] \\cdot dy Args: x (tvm.tensor.Tensor): tensor of type float16, float32. dy (tvm.tensor.Tensor): tensor of type float16, float32. Returns: tvm.tensor.Tensor, same type and shape as x. """ dtype = x.dtype vc_util.ops_dtype_check(x.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.ops_dtype_check(dy.dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(x.shape) vc_util.check_shape(dy.shape) one = akg.tvm.const(1.0, dtype=dtype) mid_square = akg.tvm.compute(x.shape, lambda *i: (one - x(*i) * x(*i)), name="mid_square") rsq = rsqrt.rsqrt(mid_square) dx = akg.tvm.compute(x.shape, lambda *i: -rsq(*i) * dy(*i), name="dx") return dx
def blas_axby(x, y, alpha, beta): r""" Blas axby. :math:`\alpha x + \beta y` Args: x (tvm.tensor.Tensor): Input `x` of type float16 or float32. y (tvm.tensor.Tensor): Input `y` of type float16 or float32. alpha (Union[int, float]): Scale of `x`. beta (Union[int, float]): Scale of `y`. Returns: tvm.tensor.Tensor, has the same shape and type as inputs. """ vc_util.ops_dtype_check([x.dtype, y.dtype], vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(x.shape) vc_util.check_shape(y.shape) ax = akg.lang.cce.vmuls(x, alpha) by = akg.lang.cce.vmuls(y, beta) res = akg.lang.cce.vadd(ax, by) return res
def relu_grad(inputs, head): """ Computes gradient of inputs for the relu op Args: inputs: It is the same with the relu op. head: Tensor, has the same type and shape as inputs. Back propagation value. Returns: Tensor, has the same type and shape as inputs. """ check_list = ["float16", "float32"] dtype = inputs.dtype if not dtype.lower() in check_list: raise RuntimeError("relu_grad only support %s while dtype is %s" % (",".join(check_list), dtype)) shape = [x.value for x in inputs.shape] vc_util.check_shape(shape) res = akg.tvm.compute(shape, lambda *i: akg.tvm.if_then_else( inputs(*i) > akg.tvm.const(0, dtype), head(*i), akg.tvm.const(0, dtype) )) return res
def pad(data, paddings, padtype): """add paddings to the tensor :shape: The shape of the tensor, now only support two dimension Tensor :paddings: The shape of the paddings, shape [N,2], N is the dimension of the tensor, For each dimension D of input, paddings[D, 0] indicates how many values to add before the contents of tensor in that dimension, and paddings[D, 1] indicates how many values to add after the contents of tensor in that dimension. :dtype: The type of the input, float16, float32 :padtype: One of "CONSTANT", "REFLECT", or "SYMMETRIC". """ # check shape vc_util.check_shape(data.shape) # check types vc_util.ops_dtype_check(data.dtype, vc_util.DtypeForDavinci.ALL_TYPES) # check padding types ptype_checklist = ['constant'] if not (padtype in ptype_checklist): raise RuntimeError("pad_cce only support %s while padtype is %s" % (",".join(ptype_checklist), padtype)) dtype = data.dtype if dtype == 'int8' or dtype == 'uint8': data = cast(data, "float16") rank = len(data.shape) pad_before = [] pad_after = [] for i in range(rank): pad_before.append(paddings[i][0]) pad_after.append(paddings[i][1]) B = tvm_pad(data, pad_before, pad_after=pad_after, name='B') if dtype == 'int8' or dtype == 'uint8': B = cast(B, dtype) return B
def leaky_relu(data, negative_slop=0): """ leaky_relu op for input tensor (N,C,H,W) OR (N,C1,H,W,C0). ..math:`max(x,negative_slop*x)` Args: data (tvm.tensor.Tensor): tensor with type float16 or float32. negative_slop (float): 0<=negative_slop<1 Returns: tvm.tensor.Tensor. """ dtype = data.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) vc_util.check_shape(data.shape) if negative_slop >= 1 or negative_slop < 0: raise RuntimeError( "leaky_relu only support negative_slop between [0,1)") slop_tmp = akg.tvm.const(negative_slop, dtype=dtype) tmp = akg.lang.cce.vmuls(data, slop_tmp) res = akg.lang.cce.vmax(tmp, data) return res
def round_value(input): """ rounds the values of a akg.tvm.tensor to the nearest even(integer), element-wise Args: input: akg.tvm.Tensor of type float16, float32 Returns: akg.tvm.Tensor of same shape as input, of type int32 Raises: ValueError: If the type of input is invalid. """ dtype = input.dtype vc_util.ops_dtype_check(dtype, vc_util.DtypeForDavinci.ALL_FLOAT) shape = input.shape vc_util.check_shape(shape) if dtype == "float16": data_f16 = input else: data_f16 = akg.tvm.compute(shape, lambda *i: input(*i).astype("float16"), name="data_f16") res = akg.lang.cce.round(data_f16) return res
def minimum(input1, input2): """ Return the min value of two tensors element-wise. Note: minimum supports broadcasting. Args: input1: Tensor. input2: Tensor. Has the same type as input1. Returns: Tensor, has the same type as inputs. """ vc_util.ops_dtype_check([input1.dtype, input2.dtype], vc_util.DtypeForDavinci.ALL_TYPES) vc_util.elemwise_dtype_check(input1.dtype, input2.dtype) dtype = input1.dtype shape1 = [x.value for x in input1.shape] shape2 = [x.value for x in input2.shape] vc_util.check_shape(shape1) vc_util.check_shape(shape2) vc_util.auto_broadcast_check(shape1, shape2) if dtype in ("int8", "uint8"): input1 = cast(input1, "float16") input2 = cast(input2, "float16") res = akg.topi.minimum(input1, input2) if dtype in ("int8", "uint8"): res = cast(res, dtype) return res
def reverse(input_data, axis): """ Reverse a tensor on some dimension. Args: input_data (tvm.tensor.Tensor): Tensor of float16, float32 and int32. axis (Union[list, tuple, int]): Because of don't support reverse which contain last dim, so can't equal None. Returns: tvm.tensor.Tensor,has the same type and shape as input_data """ shape = get_shape(input_data) dtype = input_data.dtype # check dtype and shape vc_util.check_shape(shape) vc_util.ops_dtype_check( dtype, [vc_util.DtypeForDavinci.ALL_FLOAT, vc_util.DtypeForDavinci.INT32]) # check axis shape_len = len(shape) if hasattr(axis, 'index'): axis = list(axis) if isinstance(axis, int): axis = [axis] vc_util.axis_check(shape_len, axis) _check_axis(axis, shape) # compute res res = reverse_compute(input_data, axis) return res