def check_param(self): """ Check parameter Parameters ---------- None Returns ------- None """ op_utils.check_shape(self.input_x_shape, param_name="input_x") op_utils.check_shape(self.input_y_shape, param_name="input_y") op_utils.check_dtype(self.input_x_dtype, ("float32", ), param_name="input_x") op_utils.check_dtype(self.input_y_dtype, ("float32", ), param_name="input_y") add_support = tbe_platform.cce_conf.api_check_support( "tik.vadd", "float32") if self.input_x_dtype != self.input_y_dtype: raise RuntimeError( "input_x and input_y do not have the same dtype") if self.input_x_dtype == "float32" and not add_support: raise RuntimeError( "Input dtype is float32, but do not support on the platform")
def check_supported(x, segment_ids, y, num_segments, kernel_name="unsorted_segment_max_d"): """ fusion pass test if num_segments is int32 """ shape = x.get("shape") dtype = x.get("dtype").lower() segment_ids_shape = segment_ids.get("shape") segment_ids_dtype = segment_ids.get("dtype").lower() check_list = ("float16", "float32", "int32", "int16") op_utils.check_dtype(dtype, check_list, param_name="x") op_utils.check_shape(shape, param_name="x") check_list_ids = ("int32") op_utils.check_dtype(segment_ids_dtype, check_list_ids, param_name="segment_ids") if num_segments <= 0: return False first_shape = int(shape[0]) ids_length = int(segment_ids_shape[0]) if first_shape != ids_length: return False total_ub_size = (num_segments + first_shape) * BLOCK_LENGTH + ( (BLOCK_LENGTH // 2 - first_shape % (BLOCK_LENGTH // 4)) + first_shape) * (BLOCK_LENGTH // 8) if total_ub_size > UB_SIZE_MAX // 2: return False return True
def _check_parameter(input_x, input_target): """ Parameters ---------- input_x : dict shape and dtype of input_x input_target : dict shape and dtype of input_target.Shape and dtype must be same as input_x Returns ------ None """ shape_x = input_x.get("shape") shape_target = input_target.get("shape") op_utils.check_shape(shape_x, param_name="input_x") if list(shape_x) != list(shape_target): raise RuntimeError("input_x and input_target must " "have the same shape.") # check input tensor data_type dtype_x = input_x.get("dtype").lower() dtype_target = input_target.get("dtype").lower() check_list = ("float16", "float32") op_utils.check_dtype(dtype_x, check_list, param_name="input_x") if dtype_x != dtype_target: raise RuntimeError("input_x and input_target must " "have the same dtype.") if dtype_x == "float32" and not tbe_platform.cce_conf.api_check_support( "te.lang.cce.vmul", "float32"): raise RuntimeError( "Instric only support float16 while input dtype is float32")
def _check_params(x, y, scale, offset, sqrt_mode, round_mode, kernel_name): """ check the parameters including shape, dtype, kernel_name, attr. """ shape = x.get("shape") x_format = x.get("format") dtype = x.get("dtype").lower() format_list = ["NC1HWC0", "FRACTAL_NZ"] if x_format not in format_list: raise RuntimeError("ascend quant only support [NC1HWC0, FRACTAL_NZ]") if x_format == "NC1HWC0": if len(shape) != 5: raise RuntimeError( "ascend quant only support the length of shape is 4 or 5") if x_format == "FRACTAL_NZ": if len(shape) < 4: raise RuntimeError( "ascend quant only support the length of shape is >= 4") check_shape(shape, param_name="x") if is_lhisi_version(): # es check_list = ["float16"] else: check_list = ["float16", "float32"] if dtype not in check_list: raise RuntimeError("ascend quant only support %s" % (",".join(check_list))) round_mode_list = ["Round", "Ceil", "Floor", "Trunc"] if round_mode not in round_mode_list: raise RuntimeError("ascend quant only support %s while" % (",".join(round_mode_list)))
def _check_para_and_getplaceholder(scalar_input, tensor_input, input_dict): check_list = ("float32", ) var_shape = input_dict["var"].get("shape") var_dtype = input_dict["var"].get("dtype") list_placeholder = [] for key, value in input_dict.items(): shape = util.scalar2tensor_one(value.get("shape")) op_utils.check_shape(shape) if value in scalar_input: if not util.is_scalar(shape): raise RuntimeError("The shape of ", key, " must be scalar") if value in tensor_input: if shape != var_shape: raise RuntimeError("The shape of", key, "must be the same as the var") dtype = value.get("dtype").lower() op_utils.check_dtype(dtype, check_list, param_name="var") if dtype != var_dtype: raise RuntimeError("The dtype of", key, "must be the same as the var") shape_refine = (functools_reduce(operator.mul, shape), ) list_placeholder.append( tvm.placeholder(shape=shape_refine, name=key, dtype=dtype)) return list_placeholder
def log(input_x, output_y, base=-1.0, scale=1.0, shift=0.0, kernel_name="log"): """ calculating data Parameters ---------- input_x : dict shape and dtype of input output_y : dict shape and dtype of output, should be same shape and type as input kernel_name : str kernel name, default value is "log" Returns ------- None """ shape = input_x.get("shape") dtype = input_x.get("dtype") input_dtype = dtype.lower() # input_x' shape check op_utils.check_shape(shape, param_name="input_x") # input_x' dtype check, only supports fp16 and fp32 check_list = ("float16", "float32") op_utils.check_dtype(input_dtype, check_list, param_name="input_x") if base <= 0 and (not isclose(base, -1.0)): error_info = {} error_info['errCode'] = 'E80000' error_info['param_name'] = 'base' error_info['op_name'] = 'log' error_info['expect_value'] = "strictly positive or -1" error_info['real_value'] = base raise RuntimeError("In op[%s], the parameter[%s] should be [%s], but actually is [%s]." % (error_info['op_name'], error_info['param_name'], \ error_info['expect_value'], error_info['real_value'])) fused_shape = [reduceIns(lambda x, y: x * y, shape[:])] data_input = tvm.placeholder(fused_shape, name="data_input", dtype=input_dtype) res = log_compute(data_input, output_y, base, scale, shift, kernel_name) # auto schedule with tvm.target.cce(): sch = generic.auto_schedule(res) # operator build config = { "name": kernel_name, "need_build": True, "tensor_list": (data_input, res) } te.lang.cce.cce_build_code(sch, config)
def atan_grad(y, dy, z, kernel_name="atan_grad"): """ Gradient calculation for atan(x) Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of output, include shape and dtype kernel_name : cce kernel name, default value is atan_grad Algorithm : ---------- forward : y = atan(x) backward gradient : de/dx = dy/dx*de/dy = 1/(1+x^2)*grad Returns ---------- None """ # get the shape and dtype shape = y.get("shape") shape_grad = dy.get("shape") dtype = y.get("dtype") dtype_grad = dy.get("dtype") # check whether kernel name is unique # check whether the shape is right check_shape(shape, param_name="y") check_shape(shape_grad, param_name="dy") if not operator.eq(shape, shape_grad): raise RuntimeError("all input shape must be the same") shape, _ = refine_shape_axes(shape, []) # check whether dtypes are fp16,fp32 and whether they are the same check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="y") check_dtype(dtype_grad, check_list, param_name="dy") dtype = dtype.lower() if dtype != dtype_grad.lower(): raise RuntimeError("all input dtype must be same") # get 2 input placeholders: data_input, grad data_input = tvm.placeholder(shape, name="input_data", dtype=dtype) grad = tvm.placeholder(shape, name="input_grad", dtype=dtype) # compute the backward gradient res = atan_grad_compute(data_input, grad, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_input, grad, res]} te.lang.cce.cce_build_code(sch, config)
def logical_or(x1, x2, y, kernel_name="logical_or"): """ algorithm : logical_or calculating the value of x1 OR x2 element-wise Parameters ---------- x1 : the dict of x1, include shape and dtype, dtype support int8, the value only support 0, 1 x2 : the dict of x2, include shape and dtype, dtype support int8, the value only support 0, 1 y : the dict of y, include shape and dtype kernel_name : string, cce kernel name, default value is "logical_or" Returns ------- None """ shape_x1 = x1.get("shape") shape_x2 = x2.get("shape") dtype_x1 = x1.get("dtype") dtype_x2 = x2.get("dtype") if dtype_x1 == "bool" or dtype_x2 == "bool": dtype_x1 = "int8" dtype_x2 = "int8" check_shape(shape_x1, param_name="x1") check_shape(shape_x2, param_name="x2") check_tuple = ("int8", ) check_dtype(dtype_x1, check_tuple, param_name="x1") check_dtype(dtype_x2, check_tuple, param_name="x2") shape_x1, shape_x2, shape_max = broadcast_shapes(shape_x1, shape_x2, param_name_input1="x1", param_name_input2="x2") dtype = dtype_x1.lower() data_x1 = tvm.placeholder(shape_x1, name="data_x1", dtype=dtype) data_x2 = tvm.placeholder(shape_x2, name="data_x2", dtype=dtype) res = logical_or_compute(data_x1, data_x2, y, kernel_name) with tvm.target.cce(): schedule = generic.auto_schedule(res) config = { "print_ir": False, "need_build": False, "name": kernel_name, "tensor_list": (data_x1, data_x2, res) } te.lang.cce.cce_build_code(schedule, config)
def mul(x, y, output, kernel_name="mul"): """ do element-wise mul operation between two input tensors Parameters: ---------- x : dict. shape, dtype of input x y : dict. shape, dtype of input y output : dict. shape, dtype of ouput kernel_name : str. cce kernel name, default value is "mul" Returns ------- None """ # format_pattern = 1 Nz and vector # format_pattern = 2 vector and Nz # format_pattern = 0 Nz scalar Nz Nz ND ND format_pattern = _mul_check_format(x, y) shape_x, shape_y = _infer_shape(format_pattern, x, y) shape_x = util.scalar2tensor_one(shape_x) dtype_x = x.get("dtype").lower() shape_y = util.scalar2tensor_one(shape_y) dtype_y = y.get("dtype").lower() op_utils.check_shape(shape_x, param_name="x") op_utils.check_shape(shape_y, param_name="y") if dtype_x != dtype_y: raise RuntimeError("dtype of inputs should be consistent") dtype = dtype_x check_list = ("int32", "float16", "float32", "int16") op_utils.check_dtype(dtype, check_list, param_name="x") vmul_support = tbe_platform.cce_conf.api_check_support( "te.lang.cce.vmul", "float32") if dtype_x == "float32" and not vmul_support: raise RuntimeError( "Input dtype is float32, but do not support on the platform") shape_x, shape_y, shape_max = op_utils.broadcast_shapes( shape_x, shape_y, param_name_input1="x", param_name_input2="y") shape_x, shape_y = op_utils.refine_shapes_for_broadcast(shape_x, shape_y) input_x = tvm.placeholder(shape_x, dtype=dtype, name="x") input_y = tvm.placeholder(shape_y, dtype=dtype, name="y") res = _mul_compute(input_x, input_y, output, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": (input_x, input_y, res)} te.lang.cce.cce_build_code(sch, config)
def data_format_dim_map(x, y, src_format="NHWC", dst_format="NCHW", kernel_name="data_format_dim_map"): """ Returns the dimension index in the destination data format given the one in. Parameters ---------- x : A Tensor with each element as a dimension index in source data format. Must be the following types: `int32`. Must be in the range [-4, 4). y : Shape and dtype of y, reserved parameter, not used now. src_format : An optional `string`. Defaults to `"NHWC"`. source data format. dst_format : An optional `string`. Defaults to `"NCHW"`. destination data format. kernel_name : CCE kernel name, default value is "data_format_dim_map" (optional). Returns ------- None """ shape_input = x.get("shape") dtype_input = x.get("dtype") # check kernel name, shape, size, dtype check_shape(shape_input, param_name="x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("int32", ) dtype_input = dtype_input.lower() check_dtype(dtype_input, check_list, param_name="x") # check length of format if len(src_format) != 4: raise ValueError( "source format must of length 4, received src_format = %s" % src_format) if len(dst_format) != 4: raise ValueError( "destination format must of length 4, received dst_format = %s" % dst_format) # get data and compute data_input = tvm.placeholder(shape_input, dtype=dtype_input, name="data_input") res = _data_format_dim_map_compute(data_input, y, src_format, dst_format, kernel_name) with tvm.target.cce(): sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": (data_input, res), "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(sch, config)
def assign_sub(var, value, out, kernel_name='assign_sub'): """ Update var by subtracting value from it. Parameters: ---------- var : dict dict of input_var, include shape and dtype, dtype support int8, uint8, int32, float16, float32 value : dict dict of input_value, include shape and dtype, dtype support int8, uint8, int32, float16, float32. Must have the same shape and dtype as input_var out : dict dict of out kernel_name : str cce kernel name, default value is "assign_sub" Returns ------- None """ # get the shape and dtype shape_var = var.get("shape") shape_value = value.get("shape") dtype_var = var.get("dtype") dtype_value = value.get("dtype") # kernel name check: should be unique # check whether the shape is right check_shape(shape_var, param_name="var") check_shape(shape_value, param_name="value") if not operator.eq(shape_var, shape_value): raise RuntimeError("all input shape must be the equal") # check whether dtypes are fp16, fp32, int8, uint8, int32 # and whether they are the same check_list = ("float16", "float32", "int8", "uint8", "int32") check_dtype(dtype_var, check_list, param_name="var") check_dtype(dtype_value, check_list, param_name="value") dtype_var = dtype_var.lower() dtype_value = dtype_value.lower() if dtype_var != dtype_value: raise RuntimeError("all input dtype must be same") shape, _ = refine_shape_axes(shape_var, []) data_var = tvm.placeholder(shape, dtype=dtype_var, name='data_var') data_value = tvm.placeholder(shape, dtype=dtype_value, name='data_value') sch, res = _assign_sub_compute(data_var, data_value, out, kernel_name) with set_bool_storage_config(): tvm.build(sch, [data_var, data_value, res], "cce", name=kernel_name)
def check_shape_dtype_format(input_shape, input_dtype, input_format, stride_h, stride_w): """ input_shape:input dic shape input_dtype: input dtype input_format: input format,NC1HWC0 The common check rule for tensor shape, just for 5hd """ op_utils.check_shape(input_shape) if len(input_shape) != DIM_5HD: error_info = {} error_info['errCode'] = 'E80012' error_info['opname'] = 'upsample' error_info['expect_value'] = '5' error_info['real_value'] = str(len(input_shape)) raise RuntimeError( error_info, "In op[%s], the num of dimensions of input[%s] should be [%s], but actually is [%s]." % (error_info['opname'], 'x', error_info['expect_value'], error_info['real_value'])) n, c1, h, w, c0 = input_shape op_utils.check_shape([n, c1, h * stride_h, w * stride_w, c0]) product = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION") product_list = ["Hi3796CV300ES", "Hi3796CV300CS"] if product in product_list: check_list = ["float16"] else: check_list = ["float16", "float32"] if input_dtype not in check_list: error_info = {} error_info['errCode'] = 'E80006' error_info['opname'] = 'upsample' error_info['tensor_name'] = 'x' error_info['excepted_dtype_list'] = str(check_list) error_info['dtype'] = str(input_dtype) raise RuntimeError( error_info, "In op[%s], the input[%s]'s dtype should be one of [%s], but actually is [%s]." % (error_info['opname'], 'x', str(check_list), str(input_dtype))) shape_c0 = C0 if input_shape[DIM_5HD - 1] != shape_c0: raise RuntimeError("The value of C0 must be 16") if input_format != "NC1HWC0": error_info = {} error_info['errCode'] = 'E80015' error_info['opname'] = 'upsample' error_info['tensor_name'] = 'x' error_info['excepted_dtype_list'] = "NC1HWC0" error_info['format'] = str(input_format) raise RuntimeError( error_info, "In op[%s], the input[%s]'s dtype should be [%s], but actually is [%s]." % (error_info['opname'], 'x', "NC1HWC0", str(input_format)))
def relu6_grad(input_grad, input_x, output_y, kernel_name="relu6_grad"): """ Parameters ---------- input_grad : dict shape and dtype of input_grad input_x : dict shape and dtype of input_x output_y : dict shape and dtype of output, should be same shape and type as input kernel_name : str cce kernel name, default value is "relu6_grad" Returns ------ None """ # check input shape shape_x = input_x.get("shape") shape_grad = input_grad.get("shape") op_utils.check_shape(shape_x, param_name="input_x") op_utils.check_shape(shape_grad, param_name="input_grad") if list(shape_x) != list(shape_grad): raise RuntimeError("input_grad and input_x must have the same shape.") # check input tensor data_type and kernel_name check_list = ("float16", "float32") input_dtype = input_x.get("dtype").lower() grad_dtype = input_grad.get("dtype").lower() op_utils.check_dtype(input_dtype, check_list, param_name="input_x") op_utils.check_dtype(grad_dtype, check_list, param_name="input_grad") if input_dtype == "float32" and not tbe_platform.cce_conf.api_check_support( "te.lang.cce.vmuls", "float32"): raise RuntimeError( "Input dtype only support float16 while input dtype is float32") shape_x = [reduce_ins(lambda x, y: x * y, shape_x[:])] input_data_orginal = tvm.placeholder(shape_x, name="input_data", dtype=input_dtype) input_grad = tvm.placeholder(shape_x, name="input_grad", dtype=grad_dtype) final_res = relu6_grad_compute(input_grad, input_data_orginal, output_y, kernel_name="relu6_grad") with tvm.target.cce(): auto_sch = generic.auto_schedule(final_res) config = { "name": kernel_name, "tensor_list": (input_grad, input_data_orginal, final_res) } te.lang.cce.cce_build_code(auto_sch, config)
def elu_grad(grads, activations, y, kernel_name="elu_grad"): """ do element-wise elu_grad operation Parameters: ---------- grads: the dict of gradient input, only support float16, float32 activations: the dict of activation input, only support float16, float32 y : the dict of output kernel_name : cce kernel name, default value is "cce_elu_grad" Returns ------- None """ shape_gradient = grads.get("shape") shape_activation = activations.get("shape") dtype_gradient = grads.get("dtype") dtype_activation = activations.get("dtype") check_shape(shape_gradient, param_name="grads") check_shape(shape_activation, param_name="activations") if not operator.eq(shape_gradient, shape_activation): raise RuntimeError("all input shape must be equal") shape_gradient, _ = refine_shape_axes(shape_gradient, []) shape_activation, _ = refine_shape_axes(shape_activation, []) check_list = ("float16", "float32") check_dtype(dtype_gradient, check_list, param_name="grads") check_dtype(dtype_activation, check_list, param_name="activations") if dtype_gradient.lower() != dtype_activation.lower(): raise RuntimeError("all input dtype must be same") dtype = dtype_gradient.lower() data_gradient = tvm.placeholder(shape_gradient, dtype=dtype, name="data_gradient") data_activation = tvm.placeholder(shape_activation, dtype=dtype, name="data_activation") res = elu_grad_compute(data_gradient, data_activation, y, kernel_name) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "print_ir": False, "tensor_list": [data_gradient, data_activation, res] } te.lang.cce.cce_build_code(auto_sch, config)
def lp_loss(predict, label, y, p, reduction="mean", kernel_name="lp_loss"): """ :param predict: dict shape and dtype of input :param label: dict shape and dtype of label, should be same shape and type as predict :param y: dict shape and dtype of y, should be same shape and type as predict :param p: int decides which loss to compute, now the p only can be 1 to compute l1_loss :param reduction: str reduce mode,can be 'mean','sum' or 'none' :param kernel_name: kernel name, default value is "lp_loss" :return: None """ predict_shape = predict.get("shape") predict_dtype = predict.get("dtype").lower() label_shape = label.get("shape") label_dtype = label.get("dtype").lower() dtype_list = ["float16", "float32"] reduction_list = ["none", "mean", "sum"] op_utils.check_dtype(predict_dtype, dtype_list) op_utils.check_dtype(label_dtype, dtype_list) op_utils.check_shape(predict_shape) op_utils.check_shape(label_shape) util.compare_tensor_dict_key(predict, label, "shape") util.compare_tensor_dict_key(predict, label, "dtype") if p != 1: raise RuntimeError("lp_loss only supports l1_loss") if reduction not in reduction_list: raise RuntimeError("reduction should be one of ['none','mean','sum']") predict_data = tvm.placeholder(predict_shape, dtype=predict_dtype, name="predict_data") label_data = tvm.placeholder(label_shape, dtype=label_dtype, name="label_data") res = lp_loss_compute(predict_data, label_data, p, reduction, kernel_name) with tvm.target.cce(): schedule = generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [predict_data, label_data, res] } te.lang.cce.cce_build_code(schedule, config)
def acos_grad(y, dy, z, kernel_name="acos_grad"): """ do element-wise acos_grad operation between two input tensors Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of z, include shape and dtype, dtype support float16, float32 kernel_name : cce kernel name, default value is "acos_grad" ------- """ # get the shape and dtype for input_1,input_2 shape_y = y.get("shape") shape_dy = dy.get("shape") dtype = y.get("dtype") dtype1 = dy.get("dtype") check_shape(shape_y, param_name="y") check_shape(shape_dy, param_name="dy") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) # raise runtimeerror if the input paras are invalid check_list = ("float16", "float32") check_dtype(dtype, check_list, param_name="y") check_dtype(dtype1, check_list, param_name="dy") dtype = dtype.lower() dtype1 = dtype1.lower() if not operator.eq(shape_y, shape_dy): raise RuntimeError( "acos_grad only support input shape while input_shape1 equals" " to input_shape2") if dtype != dtype1: raise RuntimeError( "acos_grad only support dtype while input_dtype1 equals" " to input_dtype2") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) data_y = tvm.placeholder(shape_y, dtype=dtype, name="data1") data_dy = tvm.placeholder(shape_dy, dtype=dtype, name="data2") res = acos_grad_compute(data_y, data_dy, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": (data_y, data_dy, res)} te.lang.cce.cce_build_code(sch, config)
def atan2(x1, x2, y, kernel_name="atan2"): """ Algorithm: arctan2 arctan2(y, x) = arctan(y/x) ---------------------------------- Parameters: x1: the dict of input data x1, only support float16, float32. x2: the dict of input data x2, only support float16, float32. y: the dict of output kernel_name: default value is "atan2". ---------------------------------- Returns: None """ y_shape = x1.get("shape") x_shape = x2.get("shape") y_dtype = x1.get("dtype") x_dtype = x2.get("dtype") check_shape(y_shape, param_name="x1") check_shape(x_shape, param_name="x2") shape_y, shape_x, shape_max = broadcast_shapes( y_shape, x_shape, param_name_input1="x1", param_name_input2="x2") check_list = ("float16", "float32") check_dtype(y_dtype, check_list, param_name="x1") check_dtype(x_dtype, check_list, param_name="x2") if y_dtype.lower() != x_dtype.lower(): raise RuntimeError("The input tensor must have identical dtype!") shape_y, shape_x = refine_shapes_for_broadcast(shape_y, shape_x) input_y = tvm.placeholder(shape_y, dtype=y_dtype.lower(), name="input_y") input_x = tvm.placeholder(shape_x, dtype=x_dtype.lower(), name="input_x") res = atan2_compute(input_y, input_x, y, kernel_name) res = te.lang.cce.cast_to(res, x_dtype.lower()) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": (input_y, input_x, res), "print_ir": False, "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(auto_sch, config)
def asin_grad(y, dy, z, kernel_name="asin_grad"): """ do element-wise asin_grad operation between two input tensors Parameters: ---------- y : dict of y, include shape and dtype, dtype support float16, float32 dy : dict of dy, include shape and dtype, dtype support float16, float32 z : dict of output kernel_name : cce kernel name, default value is "asin_grad" Returns ------- None """ # get the shape and dtype shape_y = y.get("shape") shape_dy = dy.get("shape") dtype_y = y.get("dtype") dtype_dy = dy.get("dtype") # kernel name check: should be unique # check whether the shape is right check_shape(shape_y, param_name="y") check_shape(shape_dy, param_name="dy") if not operator.eq(shape_y, shape_dy): raise RuntimeError("all input shape must be the same") shape_y, _ = refine_shape_axes(shape_y, []) shape_dy, _ = refine_shape_axes(shape_dy, []) # check whether dtypes are fp16,fp32 and whether they are the same check_list = ("float16", "float32") check_dtype(dtype_y, check_list, param_name="y") check_dtype(dtype_dy, check_list, param_name="dy") dtype_y = dtype_y.lower() if dtype_y != dtype_dy.lower(): raise RuntimeError("all input dtype must be same") # get 2 input tensors: data_y, data_dy data_y = tvm.placeholder(shape_y, name="data_y", dtype=dtype_y) data_dy = tvm.placeholder(shape_y, name="data_dy", dtype=dtype_y) res = asin_grad_compute(data_y, data_dy, z, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = {"name": kernel_name, "tensor_list": [data_y, data_dy, res]} te.lang.cce.cce_build_code(sch, config)
def input_param_check(self, profile): """ check if the inputs are valid Parameters ---------- profile: Dprofile, ai_core profile explanation Returns ------- None """ product_name = tbe_platform.cce_conf.get_soc_spec("SOC_VERSION") if product_name in ("Ascend310", "Ascend910", "Hi3796CV300ES", "Hi3796CV300CS"): op_utils.check_dtype(self.dtype.lower(), ["float16"], param_name="input_x") op_utils.check_dtype(self.y_dtype.lower(), ["float16"], param_name="input_y") else: op_utils.check_dtype(self.dtype.lower(), ["float16", "float32"], param_name="input_x") op_utils.check_dtype(self.y_dtype.lower(), ["float16", "float32"], param_name="input_y") if self.dtype != self.y_dtype: raise RuntimeError("dtype in x and y must be equal") op_utils.check_shape(self.x_shape, param_name="input_x") op_utils.check_shape(self.y_shape, param_name="input_y") # x must be 4D, NCHW if len(self.x_shape) != DIGIT_4: raise RuntimeError("input params check error," " x shape must be 4D: NCHW") if len(self.y_shape) != DIGIT_5: raise RuntimeError("input params check error, y shape must be 5HD") if self.group_size >= DIGIT_128: raise RuntimeError("input params check error," " group_size must be less than 128") calc_c = self.output_dim * self.group_size * self.group_size if self.x_shape[1] != calc_c and \ self.x_shape[1] != align_value(calc_c, C0): raise RuntimeError( "input_param_check, input fm channel number" " does not match layer parameters,", calc_c) if self.x_shape[0] != self.y_shape[0] or \ self.x_shape[2] != self.y_shape[2] or \ self.x_shape[3] != self.y_shape[3] or self.y_shape[1] != \ ceil_value(self.output_dim, C0)*self.group_size*self.group_size: raise RuntimeError("input params check error," " x shape and y shape is not match")
def depthwise_weight_4d_2_6d(x, y, src_format, dst_format, kernel_name="depthwise_weight_4d_2_6d"): """Operation and Schedule for depthwise_weight_4d_2_6d. Parameters ---------- x: shape and dtype of input, the dtype support float16, float32, int32, uint16. y: the shape and dtype of outputs, the dtype same as input. src_format: the source data_format dst_format: the target data_format kernel_name : cce kernel name, default value is "depthwise_weight_4d_2_6d" Returns ------- convert HWCN to C1HWNCoC0 """ if src_format.lower() != "hwcn": raise RuntimeError("dst_format must be HWCN!") if dst_format.lower() != "c1hwncoc0": raise RuntimeError("src_format must be C1HWNCoC0 !") input_shape = x.get("shape") dtype = x.get("dtype") op_utils.check_shape(input_shape, param_name="x") check_list = ("float16", "float32", "int32", "uint16") dtype = dtype.lower() op_utils.check_dtype(dtype, check_list, param_name="x") input_data = tvm.placeholder(input_shape, name="input_data", dtype=dtype) four2six = _Four2SixParam(input_shape) res = tvm.extern( [four2six.get_out_shape()], [input_data], lambda ins, outs: _intrin_factor(four2six, dtype, ins, outs), name="res", dtype=dtype) sch = tvm.create_schedule(res.op) build_list = [input_data, res] with build_config: tvm.build(sch, build_list, "cce", name=kernel_name)
def depthwise_weight_6d_2_4d(x, y, src_format, dst_format, kernel_name="depthwise_weight_6d_2_4d"): """Operation and Schedule for depthwise_weight_6d_2_4d. Parameters ---------- x: shape and dtype of input, the dtype support float16, float32, int32, uint16. y: the shape and dtype of outputs, the dtype same as input. src_format: the source data_format dst_format: the target data_format kernel_name : cce kernel name, default value is "depthwise_weight_6d_2_4d" Returns ------- convert C1HWNCoC0 tp HWCN """ _check_parameters(x, y, src_format, dst_format) output_shape = y.get("shape") channel_size = output_shape[2] input_shape = x.get("shape") dtype = x.get("dtype") channel_4d = channel_size op_utils.check_shape(input_shape, param_name="x") check_list = ("float16", "float32", "int32", "uint16") dtype = dtype.lower() op_utils.check_dtype(dtype, check_list, param_name="x") input_data = tvm.placeholder(input_shape, name="input_data", dtype=dtype) six2four = _Six2FourParam(input_shape, channel_4d) res = tvm.extern( [six2four.get_out_shape()], [input_data], lambda ins, outs: _intrin_factor(six2four, dtype, ins, outs), name="res", dtype=dtype) sch = tvm.create_schedule(res.op) build_list = [input_data, res] with build_config: tvm.build(sch, build_list, "cce", name=kernel_name)
def sigmoid_grad(x, y, z, kernel_name="sigmoid_grad"): """ do sigmoid grad sigmoid_grad = (sigmoid - sigmoid*sigmoid)*grad Parameters: ---------- x : dictionary shape of sigmoid input y : dictionary shape of grad z: dictionary output kernel_name : cce kernel name, default value is "sigmoid_grad_cce" Returns ------- None """ shape_sig = x.get("shape") shape_d = y.get("shape") dtype = x.get("dtype") dtype_y = y.get("dtype") if dtype != dtype_y: raise RuntimeError("Input dtype must be equal") if not operator.eq(list(shape_sig), list(shape_d)): raise RuntimeError("Input shapes must be equal") op_utils.check_shape(shape_sig, param_name="x") input_dtype = dtype.lower() op_utils.check_dtype(input_dtype, ("float16", "float32"), param_name="x") shape_sig = [reduce_ins(lambda x, y: x * y, shape_sig[:])] input_sigmoid = tvm.placeholder(shape_sig, name="input_sigmoid", dtype=input_dtype) input_grad = tvm.placeholder(shape_sig, name="input_grad", dtype=input_dtype) with tvm.target.cce(): res = sigmoid_grad_compute(input_sigmoid, input_grad, z, kernel_name) auto_sch = topi.generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [input_sigmoid, input_grad, res] } te.lang.cce.cce_build_code(auto_sch, config)
def fill_d(value, y, dims, kernel_name="fill_d"): """ do fill operation Parameters: ---------- value: the dict of input value, include shape and dtype, dtype support int8, uint8, int32, float16, float32 y : the dict of output dims : the output shape, type support int32 kernel_name : cce kernel name, default value is "fill_d" Returns ------- None """ # get the shape and dtype shape_value = value.get("shape") dtype_value = value.get("dtype") # check whether the shape is right check_shape(dims, param_name="dims") check_shape(shape_value, param_name="value") # check whether dtypes are right check_list_value = ("int8", "uint8", "int32", "float16", "float32") check_dtype(dtype_value, check_list_value, param_name="value") # get 2 input tensors: data_dims, data_value compatible_shape_in = _check_shape_compatibility(shape_value, dims) dtype_value = dtype_value.lower() data_value = tvm.placeholder(compatible_shape_in, dtype=dtype_value, name="data_value") res = _fill_compute(data_value, y, dims, kernel_name) with tvm.target.cce(): sch = generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": (data_value, res), "print_ir": False } te.lang.cce.cce_build_code(sch, config)
def asinh(input_x, output_y, kernel_name="asinh"): """ algrithm: asinh(x) = log(x + sqrt(x^2 + 1)) Parameters ---------- input_x: the dict of input_x, only support float16, float32 output_y : the dict of output_y kernel_name : cce kernel name, default value is "asinh" Returns ------- None """ shape_input = input_x.get("shape") dtype_input = input_x.get("dtype") check_shape(shape_input, param_name="input_x") shape_input, _ = refine_shape_axes(shape_input, []) check_list = ("float16", "float32") check_dtype(dtype_input, check_list, param_name="input_x") inp_dtype = dtype_input.lower() shape_input = (functool_reduce(lambda x, y: x * y, shape_input), ) data_input = tvm.placeholder(shape_input, dtype=inp_dtype, name="data_input") with tvm.target.cce(): if tbe_platform.cce_conf.api_check_support("te.lang.cce.vlog", "float32") or not \ tbe_platform.cce_conf.api_check_support("te.lang.cce.vrec", "float32"): res = asinh_compute_cloud(data_input, output_y, kernel_name) else: res = asinh_compute_mini(data_input, output_y, kernel_name) sch = generic.auto_schedule(res) config = { "name": kernel_name, "tensor_list": [data_input, res], "bool_storage_as_1bit": False } te.lang.cce.cce_build_code(sch, config)
def check_grad_param(grad_dic): """ check the parameters grad is valid Parameters ---------- grad_dic: dict,shape and datatype,datatype supports float32 Returns ------- None """ grad_dtype = grad_dic.get("dtype").lower() grad_shape = grad_dic.get("shape") op_utils.check_shape(grad_shape) op_utils.check_dtype(grad_dtype, ["float32"])
def check_indices_param(indices_dic): """ check the parameters indices is valid Parameters ---------- indices_dic: dict,shape and datatype,datatype supports int32 Returns ------- None """ indices_dtype = indices_dic.get("dtype").lower() indices_shape = indices_dic.get("shape") op_utils.check_shape(indices_shape) op_utils.check_dtype(indices_dtype, ["int32"])
def relu6_d(input_x, output_y, scale=1.0, kernel_name="relu6_d"): """ f(x)= 6(x >= 6) f(x)= 0(x <= 0) f(x)= x(0<x<6) Parameters ---------- input_x : dict shape and dtype of input_x output_y : dict shape and dtype of output_y, should be same shape and type as input kernel_name : str cce kernel name, default value is "relu6" Returns ------ None """ input_shape = util.scalar2tensor_one(input_x.get("shape")) input_dtype = input_x.get("dtype").lower() op_utils.check_shape(input_shape, param_name="input_x") vmaxs_support = tbe_platform.cce_conf.api_check_support( "te.lang.cce.vmaxs", "float32") if input_dtype == "float32" and not vmaxs_support: raise RuntimeError( "Input dtype is float32, but do not support on the platform") # check input tensor data_type check_list = ("int32", "float16", "float32") op_utils.check_dtype(input_dtype, check_list, param_name="input_x") input_shape = [reduce_ins(lambda x, y: x * y, input_shape[:])] input_data = tvm.placeholder(input_shape, name="input_data", dtype=input_dtype) final_res = relu6_d_compute(input_data, output_y, scale, kernel_name=kernel_name) with tvm.target.cce(): auto_sch = topi.generic.auto_schedule(final_res) config = {"name": kernel_name, "tensor_list": (input_data, final_res)} te.lang.cce.cce_build_code(auto_sch, config)
def relu_v2(x, y, mask, kernel_name="relu_v2"): """ Algrithm: relu_v2(x) = x and 1 when x > 0 , else 0, 0 Parameters ---------- Algorithm: relu_v2 Parameters: x: the dict of input data, support float16, float32, int8, int32, uint8 y: the dict of output mask: the dict of mask_output kernel_name: cce kernel name, default value is "relu_v2". Returns ------- None """ shape = x.get("shape") dtype = x.get("dtype") check_shape(shape, param_name="x") if shape[-1] % 8 != 0: raise RuntimeError("the last axis if shape must be dive by 8") check_list = ("float16", "float32", "int8", "int32", "uint8") check_dtype(dtype, check_list, param_name="x") dtype = dtype.lower() input_data = tvm.placeholder(shape, dtype, "input_data") with tvm.target.cce(): res, res_mask = relu_v2_compute(input_data, y, mask, kernel_name) sch = generic.auto_schedule([res, res_mask]) config = { "name": kernel_name, "tensor_list": [input_data, res, res_mask], "print_ir": False } te.lang.cce.cce_build_code(sch, config)
def check_shape_1(shape_1): """ check the shape for x1 Parameters ---------- shape_1 : list or tuple shape for x1 Returns ------- None """ op_utils.check_shape(shape_1, param_name="x1") op_utils.check_shape(shape_1, min_rank=4, max_rank=4, param_name="x1")
def softplus_v2(x, y, beta=1.0, threshold=20.0, kernel_name="softplus_v2"): """ Computes softplus operation with attribute beta and threshold. The output: log(1+exp(beta*x))/beta if x/beta <= threshold else x. Parameters ---------- x: dict The input_features passed as input to the corresponding softplus operation. source data type support "float16", "float32". y: dict data of output. beta: float16/float32, option, default:1.0 threshold: float16/float32, option, default:20.0 kernel_name: str kernel name, default value is "softplus_v2". Returns ------- None """ shape_feature = x.get("shape") dtype_feature = x.get("dtype") dtype_output = y.get("dtype") # check dtype and shape check_list = ("float16", "float32") check_dtype(dtype_feature, check_list, param_name="x") check_dtype(dtype_output, check_list, param_name="y") check_shape(shape_feature, param_name="x") if beta == 0.0: raise ZeroDivisionError("the value of beta must be non-zero") data_features = tvm.placeholder(shape_feature, dtype=dtype_feature, name="data_features") res = softplus_v2_compute(data_features, beta, threshold, kernel_name) # TODO:auto schedule with tvm.target.cce(): schedule = generic.auto_schedule(res) # TODO:operator build config = {"name": kernel_name, "tensor_list": [data_features, res]} te.lang.cce.cce_build_code(schedule, config)