def elu_grad(grads, activations, y, kernel_name="elu_grad"): """ do element-wise elu_grad operation Parameters: ---------- grads: the dict of gradient input, only support float16, float32 activations: the dict of activation input, only support float16, float32 y : the dict of output kernel_name : cce kernel name, default value is "cce_elu_grad" Returns ------- None """ shape_gradient = grads.get("shape") shape_activation = activations.get("shape") dtype_gradient = grads.get("dtype") dtype_activation = activations.get("dtype") check_shape(shape_gradient, param_name="grads") check_shape(shape_activation, param_name="activations") if not operator.eq(shape_gradient, shape_activation): raise RuntimeError("all input shape must be equal") shape_gradient, _ = refine_shape_axes(shape_gradient, []) shape_activation, _ = refine_shape_axes(shape_activation, []) check_list = ("float16", "float32") check_dtype(dtype_gradient, check_list, param_name="grads") check_dtype(dtype_activation, check_list, param_name="activations") if dtype_gradient.lower() != dtype_activation.lower(): raise RuntimeError("all input dtype must be same") dtype = dtype_gradient.lower() data_gradient = tvm.placeholder(shape_gradient, dtype=dtype, name="data_gradient") data_activation = tvm.placeholder(shape_activation, dtype=dtype, name="data_activation") res = elu_grad_compute(data_gradient, data_activation, y, kernel_name) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": [data_gradient, data_activation, res] } te.lang.cce.cce_build_code(auto_sch, config)
def acos_grad(y, dy, z, kernel_name="acos_grad"): """ do element-wise acos_grad operation between two input tensors Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of z, include shape and dtype, dtype support float16, float32 kernel_name : cce kernel name, default value is "acos_grad" ------- """ # get the shape and dtype for input_1,input_2 shape_y = y.get("shape") shape_dy = dy.get("shape") dtype = y.get("dtype") dtype1 = dy.get("dtype") check_shape(shape_y, param_name="y") check_shape(shape_dy, param_name="dy") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) # raise runtimeerror if the input paras are invalid check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="y") check_dtype(dtype1, check_list, param_name="dy") dtype = dtype.lower() dtype1 = dtype1.lower() if not operator.eq(shape_y, shape_dy): raise RuntimeError( "acos_grad only support input shape while input_shape1 equals" " to input_shape2") if dtype != dtype1: raise RuntimeError( "acos_grad only support dtype while input_dtype1 equals" " to input_dtype2") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) data_y = tvm.placeholder(shape_y, dtype=dtype, name="data1") data_dy = tvm.placeholder(shape_dy, dtype=dtype, name="data2") res = acos_grad_compute(data_y, data_dy, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": (data_y, data_dy, res)} te.lang.cce.cce_build_code(sch, config)
def asin_grad(y, dy, z, kernel_name="asin_grad"): """ do element-wise asin_grad operation between two input tensors Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of output kernel_name : cce kernel name, default value is "asin_grad" Returns ------- None """ # get the shape and dtype shape_y = y.get("shape") shape_dy = dy.get("shape") dtype_y = y.get("dtype") dtype_dy = dy.get("dtype") # kernel name check: should be unique # check whether the shape is right check_shape(shape_y, param_name="y") check_shape(shape_dy, param_name="dy") if not operator.eq(shape_y, shape_dy): raise RuntimeError("all input shape must be the same") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) # check whether dtypes are fp16,fp32 and whether they are the same check_list = ("float16", "float32") check_dtype(dtype_y, check_list, param_name="y") check_dtype(dtype_dy, check_list, param_name="dy") dtype_y = dtype_y.lower() if dtype_y != dtype_dy.lower(): raise RuntimeError("all input dtype must be same") # get 2 input tensors: data_y, data_dy data_y = tvm.placeholder(shape_y, name="data_y", dtype=dtype_y) data_dy = tvm.placeholder(shape_y, name="data_dy", dtype=dtype_y) res = asin_grad_compute(data_y, data_dy, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_y, data_dy, res]} te.lang.cce.cce_build_code(sch, config)
def atan_grad(y, dy, z, kernel_name="atan_grad"): """ Gradient calculation for atan(x) Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of output, include shape and dtype kernel_name : cce kernel name, default value is atan_grad Algorithm : ---------- forward : y = atan(x) backward gradient : de/dx = dy/dx*de/dy = 1/(1+x^2)*grad Returns ---------- None """ # get the shape and dtype shape = y.get("shape") shape_grad = dy.get("shape") dtype = y.get("dtype") dtype_grad = dy.get("dtype") # check whether kernel name is unique # check whether the shape is right check_shape(shape, param_name="y") check_shape(shape_grad, param_name="dy") if not operator.eq(shape, shape_grad): raise RuntimeError("all input shape must be the same") shape, _ = refine_shape_axes(shape, []) # check whether dtypes are fp16,fp32 and whether they are the same check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="y") check_dtype(dtype_grad, check_list, param_name="dy") dtype = dtype.lower() if dtype != dtype_grad.lower(): raise RuntimeError("all input dtype must be same") # get 2 input placeholders: data_input, grad data_input = tvm.placeholder(shape, name="input_data", dtype=dtype) grad = tvm.placeholder(shape, name="input_grad", dtype=dtype) # compute the backward gradient res = atan_grad_compute(data_input, grad, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, grad, res]} te.lang.cce.cce_build_code(sch, config)
def data_format_dim_map(x, y, src_format="NHWC", dst_format="NCHW", kernel_name="data_format_dim_map"): """ Returns the dimension index in the destination data format given the one in. Parameters ---------- x : A Tensor with each element as a dimension index in source data format. Must be the following types: `int32`. Must be in the range [-4, 4). y : Shape and dtype of y, reserved parameter, not used now. src_format : An optional `string`. Defaults to `"NHWC"`. source data format. dst_format : An optional `string`. Defaults to `"NCHW"`. destination data format. kernel_name : CCE kernel name, default value is "data_format_dim_map" (optional). Returns ------- None """ shape_input = x.get("shape") dtype_input = x.get("dtype") # check kernel name, shape, size, dtype check_shape(shape_input, param_name="x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("int32", ) dtype_input = dtype_input.lower() check_dtype(dtype_input, check_list, param_name="x") # check length of format if len(src_format) != 4: raise ValueError( "source format must of length 4, received src_format = %s" % src_format) if len(dst_format) != 4: raise ValueError( "destination format must of length 4, received dst_format = %s" % dst_format) # get data and compute data_input = tvm.placeholder(shape_input, dtype=dtype_input, name="data_input") res = _data_format_dim_map_compute(data_input, y, src_format, dst_format, kernel_name) with tvm.target.cce(): sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": (data_input, res), "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(sch, config)
def tensor_equal(input_x, input_y, output_z, kernel_name="tensor_equal"): ''' True if two tensors have the same size and elements, False otherwise :param input_x: dict input tenser x :param input_y: dict input tensor y :param kernel_name: str kernel name, default value is "tensor_equal" :return: none ''' shape_x = input_x.get("shape") dtype_x = input_x.get("dtype") shape_y = input_y.get("shape") dtype_y = input_y.get("dtype") check_shape(shape_x) check_shape(shape_y) check_list = ("float16", "float32", "int32", "int8", "uint8") check_dtype(dtype_x, check_list) check_dtype(dtype_y, check_list) shape_x = list(shape_x) shape_x, _ = refine_shape_axes(shape_x, []) data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_x) shape_y, _ = refine_shape_axes(shape_y, []) data_input_y = tvm.placeholder(shape_y, name="data_input_y", dtype=dtype_y) # use vsub method compute equal result res = tensor_equal_compute_use_sub(data_input_x, data_input_y, output_z, kernel_name) with tvm.target.cce(): schedule = generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [data_input_x, data_input_y, res], "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(schedule, config)
def assign_sub(var, value, out, kernel_name='assign_sub'): """ Update var by subtracting value from it. Parameters: ---------- var : dict dict of input_var, include shape and dtype, dtype support int8, uint8, int32, float16, float32 value : dict dict of input_value, include shape and dtype, dtype support int8, uint8, int32, float16, float32. Must have the same shape and dtype as input_var out : dict dict of out kernel_name : str cce kernel name, default value is "assign_sub" Returns ------- None """ # get the shape and dtype shape_var = var.get("shape") shape_value = value.get("shape") dtype_var = var.get("dtype") dtype_value = value.get("dtype") # kernel name check: should be unique # check whether the shape is right check_shape(shape_var, param_name="var") check_shape(shape_value, param_name="value") if not operator.eq(shape_var, shape_value): raise RuntimeError("all input shape must be the equal") # check whether dtypes are fp16, fp32, int8, uint8, int32 # and whether they are the same check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(dtype_var, check_list, param_name="var") check_dtype(dtype_value, check_list, param_name="value") dtype_var = dtype_var.lower() dtype_value = dtype_value.lower() if dtype_var != dtype_value: raise RuntimeError("all input dtype must be same") shape, _ = refine_shape_axes(shape_var, []) data_var = tvm.placeholder(shape, dtype=dtype_var, name='data_var') data_value = tvm.placeholder(shape, dtype=dtype_value, name='data_value') sch, res = _assign_sub_compute(data_var, data_value, out, kernel_name) with set_bool_storage_config(): tvm.build(sch, [data_var, data_value, res], "cce", name=kernel_name)
def asinh(input_x, output_y, kernel_name="asinh"): """ algrithm: asinh(x) = log(x + sqrt(x^2 + 1)) Parameters ---------- input_x: the dict of input_x, only support float16, float32 output_y : the dict of output_y kernel_name : cce kernel name, default value is "asinh" Returns ------- None """ shape_input = input_x.get("shape") dtype_input = input_x.get("dtype") check_shape(shape_input, param_name="input_x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("float16", "float32") check_dtype(dtype_input, check_list, param_name="input_x") inp_dtype = dtype_input.lower() shape_input = (functool_reduce(lambda x, y: x * y, shape_input), ) data_input = tvm.placeholder(shape_input, dtype=inp_dtype, name="data_input") with tvm.target.cce(): if tbe_platform.cce_conf.api_check_support("te.lang.cce.vlog", "float32") or not \ tbe_platform.cce_conf.api_check_support("te.lang.cce.vrec", "float32"): res = asinh_compute_cloud(data_input, output_y, kernel_name) else: res = asinh_compute_mini(data_input, output_y, kernel_name) sch = generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [data_input, res], "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(sch, config)
def asin(x, y, kernel_name="asin"): """ ---------- asin(x) = | arcsin(sqrt(1-x^2)) - HALF_PI, x belongs to (-1, 2^(-0.5)) | the 15th order taylor expansion, x belongs to (-2^(-0.5), 2^(-0.5)) | HALF_PI - arcsin(sqrt(1-x^2)), x belongs to (2^(-0.5), 1) Parameters: ---------- x : the placeholder of data input y : the dict of output kernel_name : cce kernel name, default value is "asin" Returns : None ------- """ shape_input = x.get("shape") dtype_input = x.get("dtype") check_shape(shape_input, param_name="x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("float16", "float32") check_dtype(dtype_input, check_list, param_name="x") inp_dtype = dtype_input.lower() data_input = tvm.placeholder(shape_input, dtype=inp_dtype, name="data_input") res = asin_compute(data_input, y, kernel_name) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": [data_input, res], "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(auto_sch, config)
def atanh(x, y, kernel_name="atanh"): """ Algrithm: atanh(x) = atanh Parameters ---------- Algorithm: atanh Parameters: x: the dict of input data, only support float16, float32. y: the dict of output kernel_name: cce kernel name, default value is "atanh". Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") check_shape(shape, param_name="x") shape, _ = refine_shape_axes(shape, []) check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="x") dtype = dtype.lower() input_data = tvm.placeholder(shape, dtype, "input_data") with tvm.target.cce(): res = atanh_compute(input_data, y, kernel_name) sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [input_data, res], "print_ir": False, "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(sch, config)
def atan(x, y, kernel_name="atan"): """ Algorithm: atan ---------------------------------- Parameters: x: the dict of input data, only support float16, float32. y: the dict of output kernel_name: cce kernel name, default value is "atan". ---------------------------------- Returns: None """ shape = x.get("shape") dtype = x.get("dtype") check_shape(shape, param_name="x") shape, _ = refine_shape_axes(shape, []) check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="x") with tvm.target.cce(): dtype = dtype.lower() input_data = tvm.placeholder(shape, dtype=dtype, name="input_data") res = atan_compute(input_data, y, kernel_name) res = te.lang.cce.cast_to(res, dtype) auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": (input_data, res) } te.lang.cce.cce_build_code(auto_sch, config)
def acosh(input_data, output_res, kernel_name="acosh"): """ calculating data's acosh,y= log(x+sqrt(x^(2)-1)) Parameters ---------- input_data: the dict of input, only support float16, float32 output_res : the dict of output kernel_name : cce kernel name, default value is "cce_acosh" Returns ------- None """ shape_input = input_data.get("shape") dtype_input = input_data.get("dtype") check_shape(shape_input, param_name="input_data") check_list = ("float16", "float32") check_dtype(dtype_input, check_list, param_name="input_data") shape_input, _ = refine_shape_axes(shape_input, []) input_dtype = dtype_input.lower() data = tvm.placeholder(shape_input, dtype=input_dtype, name="data_input") res = acosh_compute(data, output_res, kernel_name) with tvm.target.cce(): sch = topi.generic.auto_schedule(res) config = {"name": kernel_name, "print_ir": False, "tensor_list": (data, res), "bool_storage_as_1bit": False} te.lang.cce.cce_build_code(sch, config)
def bessel_i1e(x, y, kernel_name="bessel_i1e"): """ Algrithm: calculating data's bessel Parameters ---------- x: the dict of input, only support float16, float32 y : the dict of output kernel_name : cce kernel name, default value is "bessel_i1e" Returns ------- None """ shape_input = x.get("shape") dtype_input = x.get("dtype") check_shape(shape_input, param_name="x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("float16", "float32") check_dtype(dtype_input, check_list, param_name="x") input_dtype = dtype_input.lower() data = tvm.placeholder(shape_input, dtype=input_dtype, name="data_input") res = bessel_i1e_compute(data, y, kernel_name) with tvm.target.cce(): sch = topi.generic.auto_schedule(res) config = {"name": kernel_name, "print_ir": False, "tensor_list": (data, res), "bool_storage_as_1bit": False} te.lang.cce.cce_build_code(sch, config)
def mse_loss(predict, label, y, reduction='mean', kernel_name="mse_loss"): ''' calculating data sum = (predict_n - label_n)^2 if reduction == sum: res = sum output a scalal if reduction == mean: res == sum/total_number_of_tensor output a scalar if reduction == none: res == (predict_n - label_n)^2 output a tensor :param predict: dict shape and dtype of tensor predict :param label: dict shape and dtype of tensor real label, should be same shape and dtype as predict :param y: dict shape and dtype of output, loss result after compute :param reduction: str Specifies the reduction to apply to the output:'none' | 'mean' | 'sum' Default: 'mean' 'none': no reduction will be applied, 'mean': the sum of the output will be divided by the number of elements in the output 'sum': the output will be summed. Note: size_average and reduce are in the process of being deprecated and in the meantime, specifying either of those two args will override reduction. :param kernel_name: str kernel name, default value is "mse_loss" :return: none ''' predict_shape = predict.get("shape") predict_dtype = predict.get("dtype") predict_dtype_lower = predict_dtype.lower() label_shape = label.get("shape") label_dtype = label.get("dtype") label_dtype_lower = label_dtype.lower() # check dtype dtype_list = ("float16", "float32") op_utils.check_dtype(predict_dtype, dtype_list) op_utils.check_dtype(predict_dtype, dtype_list) # check shape op_utils.check_shape(predict_shape) op_utils.check_shape(label_shape) # check kernel_name util.check_kernel_name(kernel_name) predict_size, _ = op_utils.refine_shape_axes(predict_shape, []) data_predict = tvm.placeholder(predict_size, dtype=predict_dtype_lower, name="data_predict") label_size, _ = op_utils.refine_shape_axes(label_shape, []) data_label = tvm.placeholder(label_size, dtype=label_dtype_lower, name="data_label") if predict_size != label_size: raise RuntimeError("predict tensor size don't match label tensor") if reduction not in ("mean", "sum", "none"): raise RuntimeError("reduction type should in mean/sum/none") res = mse_loss_compute(data_predict, data_label, reduction, kernel_name) with tvm.target.cce(): schedule = generic.auto_schedule(res) config = {"print_ir": False, "name": kernel_name, "tensor_list": [data_predict, data_label, res]} te.lang.cce.cce_build_code(schedule, config)
def approximate_equal(input_x, input_y, output_z, tolerance=1e-5, kernel_name="approximate_equal"): """ abs(x-y) <= tolerance Parameters ---------- input_x : dict, include shape and dtype, support fp16 and fp32 shape of tensors, assume src_shape equals dst_shape input_y : dict, include shape and dtype, support fp16 and fp32 shape of tensors, assume src_shape equals dst_shape output_z : dict, include shape and dtype, reserve tolerance: default 1e-5 kernel_name : str cce kernel name, default value is "approximate_equal" Returns ------ None """ shape_x = input_x.get("shape") shape_y = input_y.get("shape") in_dtype = input_x.get("dtype") in_y_dtype = input_y.get("dtype") if tolerance < 0: raise RuntimeError("tolerance should >= 0") # check shape if not operator.eq(shape_x, shape_y): raise RuntimeError("all input shape must same") check_shape(shape_x, param_name="input_x") shape_x, _ = refine_shape_axes(shape_x, []) shape_y, _ = refine_shape_axes(shape_y, []) # check input tensor data_type check_list = ("float16", "float32") check_dtype(in_dtype, check_list, param_name="input_x") check_dtype(in_y_dtype, check_list, param_name="input_y") in_dtype = input_x.get("dtype").lower() in_y_dtype = input_y.get("dtype").lower() if not operator.eq(in_dtype, in_y_dtype): raise RuntimeError("all input type must same.") in_data_x = tvm.placeholder(shape_x, name="shape_x", dtype=in_dtype) in_data_y = tvm.placeholder(shape_y, name="shape_y", dtype=in_dtype) res = approximate_equal_compute(in_data_x, in_data_y, output_z, tolerance, kernel_name) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [in_data_x, in_data_y, res], "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(auto_sch, config)