Python compare_tensor_dict_key示例，topi.cce.util.compare_tensor_dict_key Python示例

示例#1

0

显示文件

文件： bn_infer_grad.py 项目： gekowa/ascend-opp

def bn_infer_grad(grads, scale, batch_variance,
                  x_backprop, epsilon=0.0001,
                  kernel_name="bn_infer_grad"):
    """
    algorithm: fused_batch_norm_grad_v2
    bn_infer_grad.

    Parameters
    ----------
    grads: dict
        dict of grads, A 5D Tensor for input grads.
    scale: dict
        dict of scale, A 5D Tensor for input scale.
    batch_variance: dict
        dict of batch_variance, A 5D Tensor for input batch_variance.
    x_backprop: dict
        dict of x_backprop, A 5D Tensor for output x_backprop.
    epsilon: float
        A small float number added to the variance of x. Defaults to `0.0001`.
    kernel_name: str
        kernel name, default value is "bn_infer_grad"

    Returns
    -------
    None
    """

    shape_grads = grads.get("shape")
    shape_scale = scale.get("shape")
    shape_batch_variance = batch_variance.get("shape")

    input_grads_dtype = grads.get("dtype").lower()
    input_scale_dtype = scale.get("dtype").lower()
    batch_variance_dtype = batch_variance.get("dtype").lower()

    check_dtype(input_grads_dtype, ("float32", "float16"), param_name="grads")
    check_dtype(input_scale_dtype, ("float32",), param_name="scale")
    check_dtype(batch_variance_dtype, ("float32",), param_name="batch_variance")

    _check_shape(shape_grads, shape_batch_variance)
    util.compare_tensor_dict_key(scale, batch_variance, "shape")

    grads_input = tvm.placeholder(shape_grads, name="grads_input",
                                  dtype=input_grads_dtype)
    scale_input = tvm.placeholder(shape_scale, name="x_input",
                                  dtype=input_scale_dtype)
    batch_variance_input = tvm.placeholder(shape_batch_variance,
                                           name="batch_variance_input",
                                           dtype=batch_variance_dtype)

    res = bn_infer_grad_compute(grads_input, scale_input,
                                batch_variance_input,
                                x_backprop, epsilon,
                                kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    tensor_list = [grads_input, scale_input, batch_variance_input, res]
    config = {"name": kernel_name,
              "tensor_list": tensor_list}
    te.lang.cce.cce_build_code(sch, config)

示例#2

0

显示文件

文件： leaky_relu_grad.py 项目： gekowa/ascend-opp

def leaky_relu_grad(g, x, y, negative_slope=0, kernel_name="leaky_relu_grad"):
    """
    calculate the backpropagation of leaky_relu operation
    y = gradients(x>0) or negative_slope*gradients(x<=0).
    support dtype:float16,float32

    Parameters
    ----------
    g : dict
        the backpropagated gradients to the corresponding leaky_relu operation
    x : dict
        the x passed as output of leaky_relu operation
    y : dict
        the output of leaky_relu back propagation
    negative_slope : float or int
        allow non-zero slope for negative inputs to speed up optimization
    kernel_name : str
        kernel name, default value is "leaky_relu_grad"

    Returns
    -------
    None
    """

    shape_g = g.get("shape")
    shape_x = x.get("shape")
    dtype_g = g.get("dtype").lower()
    dtype_x = x.get("dtype").lower()

    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape_g)
    util.check_shape_rule(shape_x)
    util.check_tensor_shape_size(shape_g)
    util.check_tensor_shape_size(shape_x)

    shape_list = util.produce_shapes(shape_g, shape_x)
    util.check_tensor_shape_size(shape_list[2])

    # check input tensor data_type
    check_list = ["float16", "float32"]
    util.check_dtype_rule(dtype_g, check_list)
    util.check_dtype_rule(dtype_x, check_list)
    util.compare_tensor_dict_key(g, x, "dtype")

    shape_g, shape_x = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data_g = tvm.placeholder(shape_g, name="data_g", dtype=dtype_g)
    data_x = tvm.placeholder(shape_x, name="data_x", dtype=dtype_g)
    res = leaky_relu_grad_compute(data_g, data_x, y, negative_slope,
                                  kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_g, data_x, res]}

    te.lang.cce.cce_build_code(schedule, config)

示例#3

0

显示文件

def rsqrt_grad(input_y, input_dy, output_z, kernel_name="rsqrt_grad"):
    """
    calculate the backpropagation of rsqrt operation
    rsqrt: y = 1 / sqrt（x）
    rsqrt_grad: -1/2 * y**3 *dy

    Parameters
    ----------
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    input_dy: dict
        dict of input_dy, include keys(shape and dtype)
    output_z: dict
        dict of  output
    kernel_name: str
        cce kernel name, default value is "rsqrt_grad"

    Returns
    -------
    None
    """
    shape_input_y = input_y.get("shape")
    dtype_input_y = input_y.get("dtype")
    shape_input_dy = input_dy.get("shape")
    dtype_input_dy = input_dy.get("dtype")

    check_shape(shape_input_y, param_name="input_y")
    check_shape(shape_input_dy, param_name="input_dy")
    util.compare_tensor_dict_key(input_y, input_dy, "shape")

    check_list = ("float16", "float32", "int32", "int8")
    dtype_input_y = dtype_input_y.lower()
    check_dtype(dtype_input_y, check_list, param_name="input_y")
    dtype_input_dy = dtype_input_dy.lower()
    check_dtype(dtype_input_dy, check_list, param_name="input_dy")
    util.compare_tensor_dict_key(input_y, input_dy, "dtype")
    reshape_y, reshape_dy = refine_shapes_for_broadcast(
        shape_input_y, shape_input_dy)

    data_input_y = tvm.placeholder(reshape_y,
                                   name="data_input_y",
                                   dtype=dtype_input_y)
    data_input_dy = tvm.placeholder(reshape_dy,
                                    name="data_input_dy",
                                    dtype=dtype_input_dy)

    res = rsqrt_grad_compute(data_input_y, data_input_dy, output_z,
                             kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_y, data_input_dy, res]
    }
    te.lang.cce.cce_build_code(sch, config)

示例#4

0

显示文件

def correction_mul_grad(dout, x, batch_std, running_std, dx, mul_dx, channel, kernel_name="correction_mul_grad"):
    """CorrectionMulGrad op"""
    shape_dout = dout.get("shape")
    shape_x = dout.get("shape")

    dtype_dout = dout.get("dtype")
    dtype_x = x.get("dtype")
    dtype_batch_std = batch_std.get("dtype")
    dtype_running_std = running_std.get("dtype")

    inp_dtype_dout = dtype_dout.lower()
    inp_dtype_x = dtype_x.lower()
    inp_dtype_batch_std = dtype_batch_std.lower()
    inp_dtype_running_std = dtype_running_std.lower()

    util.check_dtype_rule(inp_dtype_dout, ("float16", "float32"))
    util.check_dtype_rule(inp_dtype_x, ("float16", "float32"))
    util.check_dtype_rule(inp_dtype_batch_std, ("float16", "float32"))
    util.check_dtype_rule(inp_dtype_running_std, ("float16", "float32"))
    util.compare_tensor_dict_key(dout, x, "dtype")
    util.compare_tensor_dict_key(dout, x, "shape")
    util.compare_tensor_dict_key(dx, x, "shape")
    util.compare_tensor_dict_key(batch_std, running_std, "shape")
    util.compare_tensor_dict_key(dx, mul_dx, "shape")

    util.check_kernel_name(kernel_name)
    util.check_shape_rule(shape_x)
    util.check_shape_size(shape_x, SHAPE_SIZE_LIMIT)

    data_format = dout.get("format")
    ori_format = dout.get("format")
    if data_format.upper() not in ("NC1HWC0", "NCHW"):
        raise RuntimeError("Un supported data format {}".format(data_format))
    if data_format.upper() == "NCHW" and ori_format != "NCHW":
        raise RuntimeError("data_format(NCHW) must same as ori_format")

    shape_c = [1] * len(shape_x)
    shape_c[channel] = batch_std.get("ori_shape")[0]
    if data_format == "NC1HWC0" and channel == 1:
        shape_c = batch_std.get("shape")

    dout_t = tvm.placeholder(shape_dout, name="dout", dtype=inp_dtype_dout)
    x_t = tvm.placeholder(shape_x, name="x", dtype=inp_dtype_x)
    batch_std_t = tvm.placeholder(shape_c, name="batch_std", dtype=inp_dtype_batch_std)
    running_std_t = tvm.placeholder(shape_c, name="running_std", dtype=inp_dtype_running_std)
    res_list = correction_mul_grad_compute(dout_t, x_t, batch_std_t, running_std_t, channel, data_format, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res_list)

    tensor_list = [dout_t, x_t, batch_std_t, running_std_t] + res_list
    config = {"print_ir": False,
              "name": kernel_name,
              "tensor_list": tensor_list}

    te.lang.cce.cce_build_code(sch, config)

示例#5

0

显示文件

def lp_loss(predict, label, y, p, reduction="mean", kernel_name="lp_loss"):
    """
    :param predict: dict
        shape and dtype of input
    :param label: dict
        shape and dtype of label, should be same shape and type as predict
    :param y: dict
        shape and dtype of y, should be same shape and type as predict
    :param p: int
        decides which loss to compute, now the p only can be 1 to compute l1_loss
    :param reduction: str
        reduce mode,can be 'mean','sum' or 'none'
    :param kernel_name: kernel name, default value is "lp_loss"
    :return:
        None
    """
    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype").lower()
    label_shape = label.get("shape")
    label_dtype = label.get("dtype").lower()

    dtype_list = ["float16", "float32"]
    reduction_list = ["none", "mean", "sum"]

    op_utils.check_dtype(predict_dtype, dtype_list)
    op_utils.check_dtype(label_dtype, dtype_list)
    op_utils.check_shape(predict_shape)
    op_utils.check_shape(label_shape)

    util.compare_tensor_dict_key(predict, label, "shape")
    util.compare_tensor_dict_key(predict, label, "dtype")

    if p != 1:
        raise RuntimeError("lp_loss only supports l1_loss")

    if reduction not in reduction_list:
        raise RuntimeError("reduction should be one of ['none','mean','sum']")

    predict_data = tvm.placeholder(predict_shape,
                                   dtype=predict_dtype,
                                   name="predict_data")
    label_data = tvm.placeholder(label_shape,
                                 dtype=label_dtype,
                                 name="label_data")

    res = lp_loss_compute(predict_data, label_data, p, reduction, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [predict_data, label_data, res]
    }
    te.lang.cce.cce_build_code(schedule, config)

示例#6

0

显示文件

文件： xlogy.py 项目： gekowa/ascend-opp

def xlogy(input_x, input_y, output_z, kernel_name="xlogy"):
    """
    algorithm: xlogy
    calculating data's xlogy, res = 0 if x == 0 else x*log(y)

    Parameters
    ----------
    input_x: dict
        dict of input_x, include keys(shape and dtype)
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    output_z: dict
        dict info of output_z
    kernel_name: str
        kernel name, default value is "xlogy"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    dtype = input_x.get("dtype")
    dtype_y = input_y.get("dtype")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    input_dtype = dtype.lower()
    input_dtype_y = dtype_y.lower()
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")
    shape_list = broadcast_shapes(shape_x,
                                  shape_y,
                                  param_name_input1="input_x",
                                  param_name_input2="input_y")

    shape_x, shape_y = refine_shapes_for_broadcast(shape_list[0],
                                                   shape_list[1])
    data1 = tvm.placeholder(shape_x, name="data1", dtype=input_dtype)
    data2 = tvm.placeholder(shape_y, name="data2", dtype=input_dtype)
    res = xlogy_compute(data1, data2, output_z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data1, data2, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)

示例#7

0

显示文件

def equal(input_x, input_y, output_z, kernel_name="equal"):
    """
    Returns the truth value of (x = y) element-wise

    Parameters
    ----------
    input_x: dict
        dict of input_x, include keys(shape and dtype)
    input_y: dict
        dict of input_y, include keys(shape and dtype)
    output_z: dict
        dict of  output
    kernel_name: str
        cce kernel name, default value is "equal"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype_x = input_x.get("dtype")
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype")
    shape_x, shape_y, shape_broadcast = broadcast_shapes(
        shape_x,
        shape_y,
        param_name_input1="input_x",
        param_name_input2="input_y")

    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    dtype_x = dtype_x.lower()
    check_dtype(dtype_x, check_list, param_name="input_x")
    dtype_y = dtype_y.lower()
    check_dtype(dtype_y, check_list, param_name="input_y")
    util.compare_tensor_dict_key(input_x, input_y, "dtype")

    shape_x = list(shape_x)
    shape_y = list(shape_y)
    shape_x, shape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_x)
    data_input_y = tvm.placeholder(shape_y, name="data_input_y", dtype=dtype_y)

    res = equal_compute(data_input_x, data_input_y, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_x, data_input_y, res]
    }
    te.lang.cce.cce_build_code(sch, config)

示例#8

0

显示文件

def softmax_grad(softmax, grad_softmax, grad_x, kernel_name="softmax_grad"):
    """
    Computes softmax gradients for a softmax operation
    The calculation formula is as follows :
    grad_x = grad_softmax * softmax - sum(grad_softmax * softmax) * softmax

    Parameters
    ----------
    softmax: dict
        shape and dtype of first input, only support float16, float32
    grad_softmax: dict
        shape and dtype of second input, only support float16, float32
    grad_x: dict
        shape and dtype of output data, should be same shape and type as input
    kernel_name: str
        kernel name, default value is "softmax_grad"

    Returns
    -------
    None
    """
    shape_softmax = softmax.get("shape")
    shape_grad_softmax = grad_softmax.get("shape")
    dtype_softmax = softmax.get("dtype")

    util.compare_tensor_dict_key(softmax, grad_softmax, "dtype")
    check_shape(shape_softmax, param_name="softmax")
    check_shape(shape_grad_softmax, param_name="grad_softmax")

    check_list = ("float16", "float32")
    input_dtype = dtype_softmax.lower()

    check_dtype(input_dtype, check_list, param_name="softmax")
    if list(shape_softmax) != list(shape_grad_softmax):
        shape_softmax, shape_grad_softmax, shape_max = \
            broadcast_shapes(shape_softmax, shape_grad_softmax, param_name_input1="softmax", param_name_input2="grad_softmax")

    softmax = tvm.placeholder(shape_softmax, name="softmax", dtype=input_dtype)
    grad_softmaxgrad = tvm.placeholder(shape_grad_softmax,
                                       name="grad_softmaxgrad",
                                       dtype=input_dtype)

    res = softmax_grad_compute(softmax,
                               grad_softmaxgrad,
                               grad_x,
                               kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [softmax, grad_softmaxgrad, res]
    }
    te.lang.cce.cce_build_code(sch, config)

示例#9

0

显示文件

def reciprocal_grad(input_y,
                    input_dy,
                    output_data,
                    kernel_name="reciprocal_grad"):
    """
    algorithm: reciprocal_grad
    calculating data's reciprocal grad,dx = -1*dy*y*y,
    where `y = 1/x`, and `dy`
    is the corresponding input gradient.

    Parameters
    ----------
    input_y: dict
        shape and dtype of input_y, only support float16, float32, int32, int8
    input_dy: dict
        shape and dtype of input_dy, should be same shape and type as input_y
    output_data: dict
        shape and dtype of output, should be same shape and type as input_y
    kernel_name: str
        kernel name, default value is "reciprocal_grad"

    Returns
    -------
    None
    """
    shape_y = input_y.get("shape")
    shape_dy = input_dy.get("shape")
    dtype_y = input_y.get("dtype").lower()
    dtype_dy = input_dy.get("dtype").lower()

    check_shape(shape_y, param_name="input_y")
    check_shape(shape_dy, param_name="input_dy")

    shape_y = util.shape_refine(shape_y)
    shape_dy = util.shape_refine(shape_dy)

    util.compare_tensor_dict_key(input_y, input_dy, "shape")
    util.compare_tensor_dict_key(input_y, input_dy, "dtype")

    check_list = ("float16", "float32", "int32", "int8")
    check_dtype(dtype_y, check_list, param_name="input_y")

    reshape_y, reshape_dy = refine_shapes_for_broadcast(shape_y, shape_dy)
    data_dy = tvm.placeholder(reshape_dy, name="data_dy", dtype=dtype_dy)
    data_y = tvm.placeholder(reshape_y, name="data_y", dtype=dtype_y)

    res = reciprocal_grad_compute(data_y, data_dy, output_data, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_y, data_dy, res]}
    te.lang.cce.cce_build_code(sch, config)

示例#10

0

显示文件

文件： iou.py 项目： gekowa/ascend-opp

def iou(bboxes, gtboxes, overlap, mode="iou", kernel_name="iou"):
    """
    calculating data

    Parameters
    ----------
    bboxes : dict
        shape and dtype of bboxes, the coordinates of bbox
        shape must be [n, 4]
        [x1, y1, x2, y2]
    gtboxes : dict
        shape and dtype of gtboxes, the coordinates of bbox
        shape must be [m, 4]
        [x1, y1, x2, y2]
    overlap : dict
        shape and dtype of overlap
        result shape is [m, n]
    mode :  str
        ('iou','iof')
        iou : the output is gtbox and bbox iou
        iof :
    kernel_name : str
        kernel name, default value is "iou"

    Returns
    -------
    None
    """
    bboxes_shape = bboxes.get("shape")
    gtboxes_shape = gtboxes.get("shape")

    check_shape(bboxes_shape, param_name="bboxes")
    check_shape(gtboxes_shape, param_name="gtboxes")

    _box_shape_check("bboxes", bboxes_shape)
    _box_shape_check("gtboxes", gtboxes_shape)

    bboxes_dtype = bboxes.get("dtype").lower()
    util.compare_tensor_dict_key(bboxes, gtboxes, "dtype")
    check_list = ("float16", "float32")
    check_dtype(bboxes_dtype, check_list, param_name="bboxes")

    # check whether mode is valid
    check_list = ("iou", "iof")
    if mode not in check_list:
        raise RuntimeError("Mode only support iou and iof")

    res = iou_compute(bboxes, gtboxes, overlap, mode, kernel_name)

    return res

示例#11

0

显示文件

文件： smooth_l1_loss_grad_v2.py 项目： gekowa/ascend-opp

def smooth_l1_loss_grad_v2(predict, label, dout, gradient, sigma=1.0, reduction='mean',
                           kernel_name="smooth_l1_loss_grad_v2"):

    # check input: predict label dout
    check_list = ("float16", "float32")

    shape_predict = predict.get("shape")
    dtype_predict = predict.get("dtype").lower()
    util.check_dtype_rule(dtype_predict, check_list)

    shape_label = label.get("shape")
    dtype_label = label.get("dtype").lower()
    util.check_dtype_rule(dtype_label, check_list)

    shape_dout = dout.get("shape")
    dtype_dout = dout.get("dtype").lower()
    util.check_dtype_rule(dtype_dout, check_list)

    util.check_shape_rule(shape_predict)
    util.check_shape_rule(shape_label)
    util.check_shape_rule(shape_dout)

    util.compare_tensor_dict_key(predict, label, "shape")
    util.compare_tensor_dict_key(predict, dout, "shape")

    # check reduction
    check_list_reduction = ("none", "mean", "sum")
    reduction_type = reduction.lower()

    util.check_dtype_rule(reduction_type, check_list_reduction)

    input_predict = tvm.placeholder(
        shape_predict, name="predict", dtype=dtype_predict)
    input_label = tvm.placeholder(
        shape_label, name="label", dtype=dtype_label)
    input_dout = tvm.placeholder(
        shape_dout, name="dout", dtype=dtype_dout)

    res = smooth_l1_loss_grad_v2_compute(input_predict, input_label, input_dout, sigma, reduction_type)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [input_predict, input_label, input_dout, res]
    }

    te.lang.cce.cce_build_code(sch, config)

示例#12

0

显示文件

文件： mod.py 项目： gekowa/ascend-opp

def mod(input_x, input_y, output_z, kernel_name="mod"):
    """
    Returns element-wise remainder of division.

    Parameters
    ----------
    input_x: dict
        input tensor contains shape and dtype attributes.
        source data type support "float16", "float32", "int8", "uint8", "int32".
    input_y: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'input_x'.
    output_z: dict
        data of output.
        Must have the same type as 'input_x'.
    kernel_name: str
        kernel name, default value is "mod"

    Returns:
    None
    """
    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")

    check_list = ("float16", "float32", "int8", "uint8", "int32")
    input_dtype = input_x.get("dtype").lower()
    check_dtype(input_dtype, check_list, param_name="input_x")
    shape_x, shape_y, shape_broadcast = broadcast_shapes(
        shape_x,
        shape_y,
        param_name_input1="input_x",
        param_name_input2="input_y")

    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_x, shape_y)
    data_x = tvm.placeholder(reshape_x, dtype=input_dtype, name="data_x")
    data_y = tvm.placeholder(reshape_y, dtype=input_dtype, name="data_y")
    res = mod_compute(data_x, data_y, output_z, kernel_name="mod")

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_x, data_y, res]}

    te.lang.cce.cce_build_code(sch, config)

示例#13

0

显示文件

文件： xdivy.py 项目： gekowa/ascend-opp

def xdivy(input_x, input_y, output_z, kernel_name="xdivy"):
    """
    algorithm: xdivy
    calculating data's xdivy,return 0 if x==0 and x/y otherwise, elementwise

    Parameters
    ----------
    input_x: dict
        dict with keys(shape and dtype) of input_x
    input_y: dict
        dict with keys(shape and dtype) of input_y
    output_z: dict
        dict with keys(shape and dtype) of output
    kernel_name : str
        kernel name, default value is "xdivy"

    Returns
    -------
    None
    """
    shape_x = input_x.get("shape")
    dtype = input_x.get("dtype")
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype")

    util.compare_tensor_dict_key(input_x, input_y, "dtype")
    check_shape(shape_x, param_name="input_x")
    check_shape(shape_y, param_name="input_y")
    shape_list = broadcast_shapes(shape_x, shape_y, param_name_input1="input_x",
                                  param_name_input2="input_y")
    input_dtype = dtype.lower()
    input_dtype_y = dtype_y.lower()
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="input_x")
    check_dtype(input_dtype_y, check_list, param_name="input_y")

    reshape_x, reshape_y = refine_shapes_for_broadcast(shape_list[0],
                                                       shape_list[1])
    data_x = tvm.placeholder(reshape_x, dtype=input_dtype, name="data_x")
    data_y = tvm.placeholder(reshape_y, dtype=input_dtype, name="data_y")

    res = xdivy_compute(data_x, data_y, output_z, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_x, data_y, res]}
    te.lang.cce.cce_build_code(sch, config)

示例#14

0

显示文件

def _dtype_check(input_x, input_scale, input_offset, input_mean,
                 input_variance, is_training):
    """
    Function to check if the dtype is in line with norms.

    Parameters
    ----------
    input_x: dict
        dict of input, A 4D Tensor for input data.
    input_scale: dict
        dict of scale,
        A 1D Tensor for scaling factor, to scale the normalized x.
    input_offset: dict
        dict of offset, A 1D Tensor for offset, to shift to the normalized x.
    input_mean: dict
        dict of mean, A 1D Tensor for population mean.
        Used for inference only, must be empty for training.
    input_variance: dict
        dict of variance, A 1D Tensor for population variance.
        Used for inference only, must be empty for training.
    is_training: bool
        A bool value to indicate the operation is for training or inference.

    Returns
    -------
    None
    """
    dtype_x = input_x.get("dtype")
    dtype_scale = input_scale.get("dtype")

    util.compare_tensor_dict_key(input_scale, input_offset, "dtype")
    if not is_training:
        util.compare_tensor_dict_key(input_scale, input_mean, "dtype")
        util.compare_tensor_dict_key(input_scale, input_variance, "dtype")

    check_dtype(dtype_x.lower(), ("float16", "float32"), param_name="input_x")
    check_dtype(dtype_scale.lower(), ("float32", "float16"),
                param_name="input_scale")

示例#15

0

显示文件

文件： prelu_grad.py 项目： gekowa/ascend-opp

def prelu_grad(input_gradients,
               input_features,
               input_weights,
               output_backprops_dx,
               output_backprops_da,
               kernel_name="prelu_grad"):
    """
    calculating the backpropagation of prelu operation
    prelu equivalent function: prelu(x) =
    max(0, input_features) + input_weights * min(0, input_features)

    so prelu_grad output_backprops:
        output_backprops_dx = input_features > 0
            ? input_gradients : input_weights * input_gradients
        output_backprops_da = input_features > 0
            ? 0 : input_features * input_gradients

    support dtype:float16, float32

    Parameters
    ----------
    input_gradients : dict
        shape and dtype of grad, not support 1D
    input_features : dict
        shape and dtype of input tensor, not support 1D
    input_weights : dict
        shape and dtype of input learning weight
    output_backprops_dx : dict
        shape and dtype of output, should be same shape
         and type as input_features
    output_backprops_da : dict
        shape and dtype of output, should be same shape
         and type as input_features
    kernel_name : str
        kernel name, default value is "prelu_grad"

    Returns
    -------
    None
    """
    shape_input_gradients = input_gradients.get("shape")
    dtype_input_gradients = input_gradients.get("dtype")
    input_gradients_dtype = dtype_input_gradients.lower()
    input_format = input_gradients.get("format")

    shape_input_features = input_features.get("shape")
    dtype_input_features = input_features.get("dtype")
    input_features_dtype = dtype_input_features.lower()

    shape_input_weights = input_weights.get("shape")
    dtype_input_weights = input_weights.get("dtype")
    input_weights_dtype = dtype_input_weights.lower()

    # check dtype
    check_list = ("float16", "float32")
    util.compare_tensor_dict_key(input_gradients, input_features, "dtype")
    util.compare_tensor_dict_key(input_gradients, input_weights, "dtype")
    check_dtype(dtype_input_gradients,
                check_list,
                param_name="input_gradients")
    check_dtype(dtype_input_features, check_list, param_name="input_features")
    check_dtype(dtype_input_weights, check_list, param_name="input_weights")
    # check shape
    check_shape(shape_input_gradients, param_name="input_gradients")
    check_shape(shape_input_features, param_name="input_features")
    check_shape(shape_input_weights, param_name="input_weights")
    if list(shape_input_gradients) != list(shape_input_features):
        shape_input_gradients, shape_input_features, shape_max = \
            broadcast_shapes(shape_input_gradients, shape_input_features,
                             param_name_input1="input_gradients",
                             param_name_input2="input_features")
    check_inputs_shape(shape_input_features, shape_input_weights, input_format)

    if len(shape_input_features) == 4:
        shape_input_weights = [1, shape_input_weights[0], 1, 1]
    elif input_format == "NC1HWC0" and len(shape_input_weights) == 5:
        pass
    elif input_format == "NC1HWC0" and len(shape_input_weights) == 1 \
            and shape_input_weights[0] != 1:
        weights_c1 = (shape_input_weights[0] + 15) // 16
        shape_input_weights = [1, weights_c1, 1, 1, 16]
    else:
        weights_shape = [1 for _ in range(len(shape_input_features))]
        weights_shape[1] = shape_input_weights[0]
        shape_input_weights = weights_shape
    data_input_gradients = tvm.placeholder(shape_input_gradients,
                                           name="data_input_gradients",
                                           dtype=input_gradients_dtype)
    data_input_features = tvm.placeholder(shape_input_features,
                                          name="data_input_features",
                                          dtype=input_features_dtype)
    data_input_weights = tvm.placeholder(shape_input_weights,
                                         name="data_input_weights",
                                         dtype=input_weights_dtype)
    res_dx, res_da = prelu_grad_compute(
        data_input_gradients, data_input_features, data_input_weights,
        output_backprops_dx, output_backprops_da, input_format, kernel_name)
    res = [res_dx, res_da]
    tensor_list = [
        data_input_gradients, data_input_features, data_input_weights
    ] + list(res)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": tensor_list}

    te.lang.cce.cce_build_code(sch, config)

示例#16

0

显示文件

文件： smooth_l1_loss_grad.py 项目： gekowa/ascend-opp

def smooth_l1_loss_grad(predict,
                        label,
                        dout,
                        gradient,
                        sigma=1.0,
                        kernel_name="smooth_l1_loss_grad"):
    """
    calculating data
    smooth = x/sigma        if -sigma < x < sigma
             1              if x > sigma
             -1             if x < -sigma
    out = smooth * dout

    Parameters
    ----------
    predict : dict
        shape and dtype of input
    label : dict
        shape and dtype of output, should be same shape and type as predict
    gradient : dict
        shape and dtype of output, should be same shape and type as predict
    dout : dict
        shape and dtype of output, should be same shape and type as predict
    sigma : float
        sigma
    kernel_name : str
        kernel name, default value is "smooth_l1_loss_grad"

    Returns
    -------
    None
    """

    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype")
    label_shape = label.get("shape")
    dout_shape = dout.get("shape")
    input_dtype = predict_dtype.lower()
    label_dtype = label.get("dtype").lower()
    dout_dtype = dout.get("dtype").lower()

    util.compare_tensor_dict_key(predict, label, "shape")
    util.compare_tensor_dict_key(predict, dout, "shape")
    util.compare_tensor_dict_key(predict, label, "dtype")
    util.compare_tensor_dict_key(predict, dout, "dtype")
    check_list = ("float16", "float32")
    check_dtype(input_dtype, check_list, param_name="predict")
    check_dtype(label_dtype, check_list, param_name="label")
    check_dtype(dout_dtype, check_list, param_name="dout")

    check_shape(predict_shape, param_name="predict")
    check_shape(label_shape, param_name="label")
    check_shape(dout_shape, param_name="dout")
    shape = (functools_reduce(lambda x, y: x * y, predict_shape[:]), )
    predict_input = tvm.placeholder(shape,
                                    name="predict_input",
                                    dtype=input_dtype)
    label_input = tvm.placeholder(shape, name="label_input", dtype=input_dtype)
    dout_input = tvm.placeholder(shape, name="dout_input", dtype=input_dtype)
    res = smooth_l1_loss_grad_compute(predict_input, label_input, dout_input,
                                      gradient, sigma, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [predict_input, label_input, dout_input, res]
    }

    te.lang.cce.cce_build_code(sch, config)

示例#17

0

显示文件

文件： sigmoid_cross_entropy_with_logits_grad.py 项目： gekowa/ascend-opp

def sigmoid_cross_entropy_with_logits_grad(
        predict,
        target,
        dout,
        gradient,
        kernel_name="sigmoid_cross_entropy_with_logits_grad"):
    """
    calculating data

    Parameters
    ----------
    predict : dict
        the output of previous layer
    target : dict
        label
    dout : dict
        last gradient
    gradient : dict
        result after compute
    kernel_name : str
        kernel name, default value is "sigmoid_cross_entropy_with_logits_grad"

    Returns
    -------
    None
    """
    check_list = ("float16", "float32")
    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype")
    gradient_dtype = gradient.get("dtype").lower()
    predict_dtype_lower = predict_dtype.lower()
    check_dtype(gradient_dtype, check_list, param_name="gradient")
    check_dtype(predict_dtype_lower, check_list, param_name="predict")

    check_shape(predict_shape, param_name="predict")

    target_shape = target.get("shape")
    target_dtype = target.get("dtype")
    target_dtype_lower = target_dtype.lower()
    check_dtype(target_dtype_lower, check_list, param_name="target")

    check_shape(target_shape, param_name="target")

    dout_shape = dout.get("shape")
    dout_dtype = dout.get("dtype")
    dout_dtype_lower = dout_dtype.lower()
    check_dtype(dout_dtype_lower, check_list, param_name="dout")

    check_shape(dout_shape, param_name="dout")
    util.compare_tensor_dict_key(predict, target, "shape")
    util.compare_tensor_dict_key(predict, dout, "shape")
    shape = (functools_reduce(lambda x, y: x * y, predict_shape[:]), )
    predict_data_input = tvm.placeholder(shape,
                                         name="predict_data_input",
                                         dtype=predict_dtype_lower)
    target_data_input = tvm.placeholder(shape,
                                        name="target_data_input",
                                        dtype=target_dtype_lower)
    dout_data_input = tvm.placeholder(shape,
                                      name="dout_data_input",
                                      dtype=dout_dtype_lower)

    res = sigmoid_cross_entropy_with_logits_grad_compute(
        predict_data_input, target_data_input, dout_data_input, gradient,
        kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name":
        kernel_name,
        "tensor_list":
        [predict_data_input, target_data_input, dout_data_input, res]
    }

    te.lang.cce.cce_build_code(sch, config)

示例#18

0

显示文件

def smooth_l1_loss_v2(predict,
                      label,
                      loss,
                      sigma=1.0,
                      reduction="mean",
                      kernel_name="smooth_l1_loss_v2"):
    """
    calculating data

    Parameters
    ----------
    predict : dict
        shape and dtype of input
    label : dict
        shape and dtype of input
    loss : dict
        shape and dtype of output,
        should be same shape and type as input
    sigma: float
        sigma, default value is 1
    reduction: str
        type of result, default value is "mean"
    kernel_name : str
        kernel name, default value is "smooth_l1_lossV2"

    Returns
    -------
    None
    """
    util.check_kernel_name(kernel_name)
    check_list = ("float16", "float32")

    shape_predict = predict.get("shape")
    dtype_predict = predict.get("dtype").lower()
    util.check_dtype_rule(dtype_predict, check_list)

    shape_label = label.get("shape")
    dtype_label = label.get("dtype").lower()
    util.check_dtype_rule(dtype_label, check_list)

    shape_loss = label.get("shape")
    dtype_loss = loss.get("dtype").lower()
    util.check_dtype_rule(dtype_loss, check_list)

    util.check_shape_rule(shape_predict)
    util.check_shape_rule(shape_label)
    util.check_shape_rule(shape_loss)

    util.compare_tensor_dict_key(predict, label, "shape")

    check_list_reduction = ("none", "mean", "sum")
    reduction_type = reduction.lower()

    util.check_dtype_rule(reduction_type, check_list_reduction)

    input_predict = tvm.placeholder(shape_predict,
                                    name="predict",
                                    dtype=dtype_predict)
    input_label = tvm.placeholder(shape_label, name="label", dtype=dtype_label)

    res = smooth_l1_loss_v2_compute(input_predict, input_label, sigma,
                                    reduction_type)

    # TODO:auto schedule
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    # TODO:operator build
    config = {
        "name": kernel_name,
        "tensor_list": [input_predict, input_label, res]
    }

    te.lang.cce.cce_build_code(sch, config)

示例#19

0

显示文件

文件： bn_training_update_grad.py 项目： gekowa/ascend-opp

def bn_training_update_grad(grads,
                            x,
                            batch_mean,
                            batch_variance,
                            diff_scale,
                            diff_offset,
                            epsilon=0.0001,
                            kernel_name="bn_training_update_grad"):
    """
    algorithm: fused_batch_norm_grad_v2
    bn_training_update_grad.

    Parameters
    ----------
    grads: dict
        dict of grads, A 5D Tensor for input grads.
    x: dict
        dict of x, A 5D Tensor for input x.
    batch_mean: dict
        dict of batch_mean, A 5D Tensor for input batch_mean.
    batch_variance: dict
        dict of batch_variance, A 5D Tensor for input batch_variance.
    diff_scale: dict
        dict of diff_scale, A 5D Tensor for output diff_scale.
    diff_offset: dict
        dict of diff_offset, A 5D Tensor for output diff_offset.
    epsilon: float
        A small float number added to the variance of x. Defaults to `0.0001`.
    kernel_name: str
        kernel name, default value is "bn_training_update_grad"

    Returns
    -------
    None
    """

    shape_grads = grads.get("shape")
    shape_x = x.get("shape")
    shape_batch_mean = batch_mean.get("shape")
    shape_batch_variance = batch_variance.get("shape")

    dtype_grads = grads.get("dtype")
    dtype_x = x.get("dtype")
    dtype_batch_mean = batch_mean.get("dtype")
    dtype_batch_variance = batch_variance.get("dtype")

    input_grads_dtype = dtype_grads.lower()
    input_x_dtype = dtype_x.lower()
    batch_mean_dtype = dtype_batch_mean.lower()
    batch_variance_dtype = dtype_batch_variance.lower()

    check_dtype(input_grads_dtype, ("float32", "float16"), param_name="grads")
    check_dtype(input_x_dtype, ("float32", "float16"), param_name="x")
    check_dtype(batch_mean_dtype, ("float32", ), param_name="batch_mean")
    check_dtype(batch_variance_dtype, ("float32", ),
                param_name="batch_variance")
    util.compare_tensor_dict_key(grads, x, "dtype")

    data_format = grads.get("format")
    ori_format = grads.get("ori_format")
    _check_format_nd(data_format, ori_format)

    if data_format == "NC1HWC0":
        _check_shape(shape_grads, shape_x, shape_batch_mean,
                     shape_batch_variance)
    else:
        shape_list = [1, 1, 1, 1]
        shape_list[1] = shape_x[1]
        shape_batch_mean = shape_list
        shape_batch_variance = shape_list

    util.compare_tensor_dict_key(grads, x, "shape")
    util.compare_tensor_dict_key(batch_mean, batch_variance, "shape")

    grads_input = tvm.placeholder(shape_grads,
                                  name="grads_input",
                                  dtype=input_grads_dtype)
    x_input = tvm.placeholder(shape_x, name="x_input", dtype=input_x_dtype)
    batch_mean_input = tvm.placeholder(shape_batch_mean,
                                       name="batch_mean_input",
                                       dtype=batch_mean_dtype)
    batch_variance_input = tvm.placeholder(shape_batch_variance,
                                           name="batch_variance_input",
                                           dtype=batch_variance_dtype)

    res_list = bn_training_update_grad_compute(grads_input,
                                               x_input,
                                               batch_mean_input,
                                               batch_variance_input,
                                               diff_scale,
                                               diff_offset,
                                               epsilon,
                                               kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res_list)
    tensor_list = [
        grads_input, x_input, batch_mean_input, batch_variance_input
    ] + list(res_list)
    config = {"name": kernel_name, "tensor_list": tensor_list}
    te.lang.cce.cce_build_code(sch, config)

示例#20

0

显示文件

文件： sigmoid_cross_entropy_with_logits_grad_v2.py 项目： gekowa/ascend-opp

def sigmoid_cross_entropy_with_logits_grad_v2(
        predict,
        target,
        dout,
        weight,
        pos_weight,
        gradient,
        reduction="mean",
        kernel_name="sigmoid_cross_entropy_with_logits_grad_v2"):
    """
    Function: it measures the gradient of Binary Cross Entropy With Logits.
    -----------
    :param predict: dict, shape and dtype of input, required
    :param target: dict,shape and dtype of target, should be same shape and type as predict, required
    :param dout: dict,shape and dtype of dout, should be same shape and type as predict, required
    :param weight: dict,shape and dtype of weight, should be same shape and type as predict, optional
    :param pos_weight: dict,shape and dtype of pos_weight, should be same shape and type as predict, optional
    :param gradient: dict,shape and dtype of target, should be same shape and type as predict, required
    :param reduction: str, specifies the reduction mode: 'none' | 'mean' | 'sum', default to 'mean'
    :param kernel_name: str, kernel name, default to 'sigmoid_cross_entropy_with_logits_grad_v2'
    :return: None
    """
    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype").lower()
    target_shape = target.get("shape")
    target_dtype = target.get("dtype").lower()
    dout_shape = dout.get("shape")
    dout_dtype = dout.get("dtype").lower()

    util.compare_tensor_dict_key(predict, target, "shape")
    util.compare_tensor_dict_key(predict, dout, "shape")
    util.compare_tensor_dict_key(predict, target, "dtype")
    util.compare_tensor_dict_key(predict, dout, "dtype")

    dtype_list = ["float16", "float32"]
    op_utils.check_dtype(predict_dtype, dtype_list)
    op_utils.check_shape(predict_shape)

    reduction_list = ["none", "mean", "sum"]
    if reduction not in reduction_list:
        raise RuntimeError("reduction should be one of ['none','mean','sum']")

    util.check_kernel_name(kernel_name)

    tensor_list = []

    predict_data = tvm.placeholder(predict_shape,
                                   predict_dtype,
                                   name="predict_data")
    target_data = tvm.placeholder(target_shape,
                                  target_dtype,
                                  name="target_data")
    dout_data = tvm.placeholder(dout_shape, dout_dtype, name="dout_data")

    tensor_list.append(predict_data)
    tensor_list.append(target_data)
    tensor_list.append(dout_data)

    weight_data, pos_weight_data = optional_weight(tensor_list, predict_shape,
                                                   dtype_list, weight,
                                                   pos_weight)

    res = sigmoid_cross_entropy_with_logits_grad_v2_compute(
        predict_data, target_data, dout_data, weight_data, pos_weight_data,
        reduction)

    tensor_list.append(res)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": tensor_list}
    te.lang.cce.cce_build_code(schedule, config)

示例#21

0

显示文件

def bn_training_reduce_grad(grads, x, diff_scale, diff_offset, scale,
                            batch_mean, batch_variance, y, epsilon=0.0001,
                            kernel_name="bn_training_reduce_grad"):
    """
    algorithm: fused_batch_norm_grad_v2
    bn_training_reduce_grad.

    Parameters
    ----------
    grads: dict
        dict of grads, A 5D Tensor for input grads.
        source data type, support "float32", "float16".
    x: dict
        dict of s, A 5D Tensor for input x.
        source data type, support "float32", "float16".
    diff_scale: dict
        dict of diff_scale, A 5D Tensor for input diff_scale.
        The output of bn_training_update_grad.
        source data type, support "float32".
    diff_offset: dict
        dict of diff_offset, A 5HD Tensor for input diff_offset.
        The output of bn_training_update_grad.
        source data type, support "float32".
    scale: dict
        dict of scale, A 5HD Tensor for input scale.
        source data type, support "float32".
    batch_mean: dict
        dict of batch_mean, A 5D Tensor for input batch_mean.
        source data type, support "float32".
    batch_variance: dict
        dict of batch_variance, A 5D Tensor for input batch_variance.
        source data type, support "float32".
    y: dict
        dict of output, A `Tensor`. Has the same type as `grads`.
    epsilon: float
        A small float number added to the variance of x.
    kernel_name: str
        kernel name, default value is "bn_training_reduce_grad"

    Returns
    -------
    None
    """

    shape_grads = grads.get("shape")
    shape_x = x.get("shape")
    shape_diff_scale = diff_scale.get("shape")
    shape_diff_offset = diff_offset.get("shape")
    shape_scale = scale.get("shape")
    shape_batch_mean = batch_mean.get("shape")
    shape_batch_variance = batch_variance.get("shape")
    util.compare_tensor_dict_key(grads, x, "shape")

    dtype_grads = grads.get("dtype")
    dtype_x = x.get("dtype")
    dtype_diff_scale = diff_scale.get("dtype")
    dtype_diff_offset = diff_offset.get("dtype")
    dtype_scale = scale.get("dtype")
    dtype_batch_mean = batch_mean.get("dtype")
    dtype_batch_variance = batch_variance.get("dtype")

    input_grads_dtype = dtype_grads.lower()
    x_dtype = dtype_x.lower()
    diff_scale_dtype = dtype_diff_scale.lower()
    diff_offset_dtype = dtype_diff_offset.lower()
    scale_dtype = dtype_scale.lower()
    batch_mean_dtype = dtype_batch_mean.lower()
    batch_variance_dtype = dtype_batch_variance.lower()

    check_dtype(input_grads_dtype, ("float32", "float16"), param_name="grads")
    check_dtype(x_dtype, ("float32", "float16"), param_name="x")
    check_dtype(diff_scale_dtype, ("float32",), param_name="diff_scale")
    check_dtype(diff_offset_dtype, ("float32",), param_name="diff_offset")
    check_dtype(scale_dtype, ("float32",), param_name="scale")
    check_dtype(batch_mean_dtype, ("float32",), param_name="batch_mean")
    check_dtype(batch_variance_dtype, ("float32",), param_name="batch_variance")

    util.compare_tensor_dict_key(diff_scale, diff_offset, "shape")
    util.compare_tensor_dict_key(diff_scale, scale, "shape")
    util.compare_tensor_dict_key(diff_scale, batch_mean, "shape")
    util.compare_tensor_dict_key(diff_scale, batch_variance, "shape")
    util.compare_tensor_dict_key(grads, x, "shape")

    data_format = grads.get("format").upper()
    ori_format = grads.get("ori_format").upper()
    _check_format_nd(data_format, ori_format)

    if data_format == "NC1HWC0":
        _check_shape(shape_grads, shape_diff_scale)
    else:
        shape_list = [1, 1, 1, 1]
        shape_list[1] = shape_x[1]
        shape_diff_scale = shape_list
        shape_diff_offset = shape_list
        shape_scale = shape_list
        shape_batch_mean = shape_list
        shape_batch_variance = shape_list

    grads_input = tvm.placeholder(shape_grads, name="grads_input",
                                  dtype=input_grads_dtype)
    x_input = tvm.placeholder(shape_x, name="x_input", dtype=x_dtype)
    diff_scale_input = tvm.placeholder(shape_diff_scale,
                                       name="diff_scale_input",
                                       dtype=diff_scale_dtype)
    diff_offset_input = tvm.placeholder(shape_diff_offset,
                                        name="diff_offset_input",
                                        dtype=diff_offset_dtype)
    scale_input = tvm.placeholder(shape_scale, name="scale_input",
                                  dtype=scale_dtype)
    batch_mean_input = tvm.placeholder(shape_batch_mean,
                                       name="batch_mean_input",
                                       dtype=batch_mean_dtype)
    batch_variance_input = tvm.placeholder(shape_batch_variance,
                                           name="batch_variance_input",
                                           dtype=batch_variance_dtype)

    res = bn_training_reduce_grad_compute(grads_input, x_input,
                                          diff_scale_input, diff_offset_input,
                                          scale_input, batch_mean_input,
                                          batch_variance_input, y, epsilon,
                                          kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)
    tensor_list = [grads_input, x_input, diff_scale_input, diff_offset_input,
                   scale_input, batch_mean_input, batch_variance_input, res]
    config = {"name": kernel_name,
              "tensor_list": tensor_list}
    te.lang.cce.cce_build_code(sch, config)

示例#22

0

显示文件

def fake_quant_with_min_max_args_gradient(gradients,
                                          x,
                                          y,
                                          min=-6,
                                          max=6,
                                          num_bits=8,
                                          narrow_range=False,
                                          kernel_name="fake_quant_"
                                          "with_min_max_args"):
    """
    Compute gradients for a FakeQuantWithMinMaxArgs operation.
    calculating data's :
    y = gradients*(if x>=nudged_min and <=nudged_max 1 else 0)

    Parameters
    ----------
    gradients:dict
              shape and dtype of input gradients,only support float32
    x: dict
        shape and dtype of input x,only support float32
    y: dict
        the dict of output data
    min: scalar float int
        Defaults to -6
    max: scalar float int
        Defaults to 6
        [min; max] define the clamping range for the x data
    num_bits: float int
        Defaults to 8.num_bits is the bitwidth of the quantization,
        between 2 and 16
    narrow_range: bool
        True or False
        if True x values are quantized into the quantization range
        [1; 2^num_bits - 1]
        if False x values are quantized into the quantization range
        [0; 2^num_bits - 1]
    kernel_name: str
        cce kernel name, default value is
        "fake_quant_with_min_max_args_gradient"

    Returns
    -------
    None
    """
    shape_gradients = gradients.get("shape")
    shape_x = x.get("shape")
    if shape_gradients != shape_x:
        raise RuntimeError("shape of two input must be same")
    util.compare_tensor_dict_key(gradients, x, "dtype")

    check_shape(shape_x, param_name="x")
    input_dtype = x.get("dtype").lower()
    check_dtype(input_dtype, ["float32"], param_name="x")
    if min >= max:
        raise RuntimeError("min must be less than max")
    if num_bits < 2 or num_bits > 16:
        raise RuntimeError("num_bits is between 2 and 16")
    shape_x = (functools_reduce(lambda x, y: x * y, shape_x[:]), )
    gradients = tvm.placeholder(shape_x, name="gradients", dtype=input_dtype)
    x = tvm.placeholder(shape_x, name="x", dtype=input_dtype)
    res = fake_quant_with_min_max_args_gradient_compute(
        gradients, x, y, float(min), float(max), num_bits, narrow_range,
        kernel_name)
    with tvm.target.cce():
        auto_sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [gradients, x, res]}
    te.lang.cce.cce_build_code(auto_sch, config)

示例#23

0

显示文件

文件： batch_norm_grad_ext2.py 项目： gekowa/ascend-opp

def batch_norm_grad_ext2(y_backprop,
                         x,
                         scale,
                         reserve_space_1,
                         reserve_space_2,
                         x_backprop,
                         scale_backprop,
                         offset_backprop,
                         reserve_space_3,
                         reserve_space_4,
                         epsilon=0.0001,
                         data_format="NHWC",
                         is_training=True,
                         kernel_name="batch_norm_grad_ext2"):
    """
    algorithm: batch_norm_grad_ext2
    Batch normalization grad.

    Parameters
    ----------
    y_backprop: dict
        dict of y_backprop.
        source data type, support "float16", "float32".
    x: dict
        dict of x.
        source data type, support "float16", "float32".
    scale: dict
        dict of scale.
        source data type, support "float32".
    reserve_space_1: dict
        dict of reserve_space_1.
        source data type, support "float32".
        When is_training is True, a Tensor for the computed batch
        mean to be reused in gradient computation. When is_training is
        False, a Tensor for the population mean to be reused in both
        1st and 2nd order gradient computation.
    reserve_space_2: dict
        dict of reserve_space_2.
        source data type, support "float32".
        When is_training is True, a Tensor for the computed batch
        variance (inverted variance in the cuDNN case) to be reused in
        gradient computation. When is_training is False, a Tensor
        for the population variance to be reused in both 1st and 2nd
        order gradient computation.
    x_backprop: dict
        dict of output. Has the same type as `y_backprop`.
    scale_backprop: dict
        dict of scale_backprop. Has the same type as `reserve_space_1`.
    offset_backprop: dict
        dict of offset_backprop. Has the same type as `reserve_space_1`.
    reserve_space_3: dict
        dict of reserve_space_3.
    reserve_space_4: dict
        dict of reserve_space_4.
    epsilon: float
        A small float number added to the variance of x. Defaults to `0.0001`.
    data_format: str
        An optional `string` from: `"NHWC", "NCHW"`. Defaults to `"NHWC"`.
        Either "NHWC" (default) or "NCHW".
    is_training: bool
        An optional `bool`. Defaults to `True`.
        A bool value to indicate the operation is for training (default)
        or inference.
    kernel_name: str
        kernel name, default value is "batch_norm_grad_ext2"

    Returns
    -------
    None
    """

    shape_y_backprop = y_backprop.get("shape")
    if len(shape_y_backprop) == 2:
        shape_y_backprop = list(shape_y_backprop) + [1, 1]
    shape_x = x.get("shape")
    if len(shape_x) == 2:
        shape_x = list(shape_x) + [1, 1]
    shape_scale = scale.get("shape")
    shape_reserve_space_1 = reserve_space_1.get("shape")
    shape_reserve_space_2 = reserve_space_2.get("shape")

    dtype_y_backprop = y_backprop.get("dtype")
    dtype_x = x.get("dtype")
    dtype_scale = scale.get("dtype")
    dtype_reserve_space_1 = reserve_space_1.get("dtype")
    dtype_reserve_space_2 = reserve_space_2.get("dtype")

    y_backprop_dtype = dtype_y_backprop.lower()
    x_dtype = dtype_x.lower()
    scale_dtype = dtype_scale.lower()
    reserve_space_1_dtype = dtype_reserve_space_1.lower()
    reserve_space_2_dtype = dtype_reserve_space_2.lower()

    check_dtype(y_backprop_dtype, ("float32", "float16"),
                param_name="y_backprop")
    check_dtype(x_dtype, ("float32", "float16"), param_name="x")
    check_dtype(scale_dtype, ("float32", ), param_name="scale")
    check_dtype(reserve_space_1_dtype, ("float32", ),
                param_name="reserve_space_1")
    check_dtype(reserve_space_2_dtype, ("float32", ),
                param_name="reserve_space_2")
    util.compare_tensor_dict_key(y_backprop, x, "dtype")

    _format_check(x, data_format)
    format_data = x.get("format")

    _check_shape_len(shape_y_backprop, shape_x, shape_scale,
                     shape_reserve_space_1, shape_reserve_space_2, format_data)
    _check_shape(shape_y_backprop, shape_x, shape_scale, shape_reserve_space_1,
                 shape_reserve_space_2, format_data)
    util.compare_tensor_dict_key(y_backprop, x, "shape")
    util.compare_tensor_dict_key(scale, reserve_space_1, "shape")
    util.compare_tensor_dict_key(scale, reserve_space_2, "shape")

    shape_list = _change_shape(shape_scale, shape_reserve_space_1,
                               shape_reserve_space_2, format_data)

    y_backprop = tvm.placeholder(shape_y_backprop,
                                 name="y_backprop",
                                 dtype=y_backprop_dtype)
    x = tvm.placeholder(shape_x, name="x", dtype=x_dtype)
    scale = tvm.placeholder(shape_list.get("shape_scale_change"),
                            name="scale",
                            dtype=scale_dtype)
    reserve_space_1 = tvm.placeholder(
        shape_list.get("shape_reserve_space_1_change"),
        name="reserve_space_1",
        dtype=reserve_space_1_dtype)
    reserve_space_2 = tvm.placeholder(
        shape_list.get("shape_reserve_space_2_change"),
        name="reserve_space_2",
        dtype=reserve_space_2_dtype)

    res_list = batch_norm_grad_ext2_compute(y_backprop,
                                            x,
                                            scale,
                                            reserve_space_1,
                                            reserve_space_2,
                                            x_backprop,
                                            scale_backprop,
                                            offset_backprop,
                                            reserve_space_3,
                                            reserve_space_4,
                                            epsilon,
                                            data_format,
                                            is_training,
                                            kernel_name=kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res_list)

    tensor_list = [y_backprop, x, scale, reserve_space_1, reserve_space_2
                   ] + list(res_list)
    config = {"name": kernel_name, "tensor_list": tensor_list}

    te.lang.cce.cce_build_code(sch, config)

示例#24

0

显示文件

def axpy_v2(x1, x2, alpha, y, kernel_name="axpy_v2"):
    """
    calculating data

    Parameters
    ----------
    x1 : dict
        shape and dtype of input_x
    x2 : dict
        shape and dtype of input_y
    alpha : dict
        shape and dtype of alpha
        scalar apply to input_y:input_y*alpha
    y : dict
        shape and dtype of output, should be same shape and type as input

    kernel_name : str
        kernel name, default value is "axpy"

    Returns
    -------
    None
    """
    # check kernel name
    util.check_kernel_name(kernel_name)

    # infer shape according to the format pattern
    format_pattern = _add_check_format(x1, x2)

    shape_x1, shape_x2 = _infer_shape(format_pattern, x1, x2)

    dtype_x1 = x1.get("dtype").lower()
    dtype_x2 = x2.get("dtype").lower()
    alpha_dtype = alpha.get("dtype").lower()
    alpha_shape = alpha.get("shape")

    # check shape
    shape_x1 = util.scalar2tensor_one(shape_x1)
    shape_x2 = util.scalar2tensor_one(shape_x2)
    alpha_shape = util.scalar2tensor_one(alpha_shape)
    op_utils.check_shape(shape_x1)
    op_utils.check_shape(shape_x2)
    op_utils.check_shape(alpha_shape)

    # check dtype
    dtype_list0 = ("float16", "float32", "int32")
    dtype_list1 = ("float16", "float32")

    check_dtype(dtype_x1, dtype_list0)
    check_dtype(dtype_x2, dtype_list0)
    check_dtype(alpha_dtype, dtype_list1)
    util.compare_tensor_dict_key(x1, x2, "dtype")

    # check alpha is 0D or 1D tensor
    if len(alpha_shape) and not util.is_scalar(alpha_shape):
        raise RuntimeError("alpha should be 0D or 1D tensor")

    # produce shapes
    shape_x1, shape_x2, shape_max = util.produce_shapes(shape_x1, shape_x2)
    if shape_x1[-1] == 1 and shape_x2[-1] == 1 and shape_max[-1] == 1:
        shape_x1 = shape_x1 if len(shape_x1) == 1 else shape_x1[:-1]
        shape_x2 = shape_x2 if len(shape_x2) == 1 else shape_x2[:-1]
        shape_max = shape_max if len(shape_max) == 1 else shape_max[:-1]

    util.check_shape_size(shape_max, SHAPE_SIZE_LIMIT)
    util.produce_shapes(shape_max, alpha_shape)

    shape_x1, shape_x2 = refine_shapes_for_broadcast(shape_x1, shape_x2)

    data_input_x1 = tvm.placeholder(shape_x1,
                                    name="data_input_x1", dtype=dtype_x1)
    data_input_x2 = tvm.placeholder(shape_x2,
                                    name="data_input_x2", dtype=dtype_x2)

    alpha_shape = tuple([1] * (len(shape_x1) - len(alpha_shape))) + tuple(alpha_shape)
    alpha_input = tvm.placeholder(alpha_shape, name="alpha_input", dtype=alpha_dtype)

    res = axpy_v2_compute(data_input_x1, data_input_x2, alpha_input, y, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"print_ir": False,
              "name": kernel_name,
              "tensor_list": [data_input_x1, data_input_x2, alpha_input, res]}

    te.lang.cce.cce_build_code(schedule, config)

示例#25

0

显示文件

def softmax_cross_entropy_with_logits(
        input_features,
        input_labels,
        output_loss,
        output_backprop,
        kernel_name="softmax_cross_entropy_with_logits"):
    """
    Computes softmax cross entropy cost.

    Parameters
    ----------
    input_features: dict
        input tensor contains shape and dtype attributes.
        source data type support "float16", "float32".
    input_labels: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'input_features'.
    output_loss: dict
        data of output.
        Must have the same type as 'input_features'.
    output_backprop: dict
        data of output.
        Must have the same type as 'input_features'.
    kernel_name: str
        kernel name, default value is "softmax_cross_entropy_with_logits"

    Returns:
    None
    """
    shape_features = input_features.get("shape")
    shape_labels = input_labels.get("shape")

    util.compare_tensor_dict_key(input_features, input_labels, "dtype")
    check_shape(shape_features, param_name="input_features")
    check_shape(shape_labels, param_name="input_labels")

    check_list = ("float16", "float32")
    input_dtype = input_features.get("dtype").lower()
    check_dtype(input_dtype, check_list, param_name="input_features")

    if len(shape_features) == 4:
        if len(shape_features) != len(shape_labels):
            raise RuntimeError("The length of two inputs must be same")
        if input_dtype != "float32":
            raise RuntimeError("Not supported dtype!")
        data_features = tvm.placeholder(shape_features,
                                        dtype=input_dtype,
                                        name="data_features")
        data_labels = tvm.placeholder(shape_labels,
                                      dtype=input_dtype,
                                      name="data_labels")
        res = softmax_cross_entropy_with_logits_nchw_compute(
            data_features, data_labels, output_loss, output_backprop)
    else:
        if len(shape_features) == 1 and len(shape_labels) == 1:
            raise RuntimeError(
                "The rank of two inputs can not be 1 at the same"
                "time")
        if len(shape_features) > 2 or len(shape_labels) > 2:
            raise RuntimeError(
                "logits and labels must be either 2-dimensional,"
                "or broadcasted to 2-dimensional")
        if len(shape_features) == 1 or len(shape_labels) == 1:
            shape_features, shape_labels, shape_broadcast = \
                broadcast_shapes(shape_features, shape_labels, param_name_input1="input_features",
                                    param_name_input2="input_labels")

        data_features = tvm.placeholder(shape_features,
                                        dtype=input_dtype,
                                        name="data_features")
        data_labels = tvm.placeholder(shape_labels,
                                      dtype=input_dtype,
                                      name="data_labels")
        res = softmax_cross_entropy_with_logits_compute(
            data_features, data_labels, output_loss, output_backprop)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    tensor_list = [data_features, data_labels] + list(res)

    config = {"name": kernel_name, "tensor_list": tensor_list}
    te.lang.cce.cce_build_code(sch, config)

示例#26

0

显示文件

def smooth_l1_loss(predict,
                   label,
                   loss,
                   sigma=1.0,
                   kernel_name="smooth_l1_loss"):
    """
    calculating data

    Parameters
    ----------
    predict : dict
        shape and dtype of input
    label : dict
        shape and dtype of input
    loss : dict
        shape and dtype of output,
        should be same shape and type as input
    sigma: float
        sigma,default value is 1
    kernel_name : str
        kernel name, default value is "smooth_l1_loss"

    Returns
    -------
    None
    """

    check_list = ("float16", "float32")
    shape_predict = predict.get("shape")
    dtype_predict = predict.get("dtype")
    input_predict_dtype = dtype_predict.lower()
    check_dtype(input_predict_dtype, check_list, param_name="predict")

    shape_label = label.get("shape")
    dtype_label = label.get("dtype")
    input_label_dtype = dtype_label.lower()
    dtype_loss = loss.get("dtype").lower()
    check_dtype(input_label_dtype, check_list, param_name="label")
    check_dtype(dtype_loss, check_list, param_name="loss")

    util.compare_tensor_dict_key(predict, label, "shape")
    check_shape(shape_predict, param_name="predict")
    check_shape(shape_label, param_name="label")
    check_list = ("float16", "float32")
    check_dtype(input_predict_dtype, check_list, param_name="predict")
    shape_predict, shape_label = \
        refine_shapes_for_broadcast(shape_predict, shape_label)
    input_predict = tvm.placeholder(shape_predict,
                                    name="predict",
                                    dtype=input_predict_dtype)
    input_label = tvm.placeholder(shape_label,
                                  name="label",
                                  dtype=input_label_dtype)
    res = smooth_l1_loss_compute(input_predict, input_label, loss, sigma,
                                 kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [input_predict, input_label, res]
    }

    te.lang.cce.cce_build_code(sch, config)

示例#27

0

显示文件

文件： relu_grad.py 项目： gekowa/ascend-opp

def relu_grad(input_gradients,
              input_features,
              output_backprops,
              kernel_name="relu_grad"):
    """
    calculate the backpropagation of relu operation
    output_backprops = input_gradients*1(input_features>0) or 0(input_features<=0).
    support dtype:float16,float32,int32,int8,uint8

    Parameters
    ----------
    input_gradients: dict
        the backpropagated gradients to the corresponding relu operation
    input_features: dict
        the features passed as output of relu operation
    output_backprops: dict
        the output of relu back propagation
    kernel_name: str
        cce kernel name, default value is "relu_grad"

    Returns
    -------
    None
    """
    shape_input_gradients = input_gradients.get("shape")
    shape_input_features = input_features.get("shape")

    util.compare_tensor_dict_key(input_gradients, input_features, "dtype")
    check_shape(shape_input_gradients, param_name="input_gradients")
    check_shape(shape_input_features, param_name="input_features")

    if list(shape_input_gradients) != list(shape_input_features):
        shape_input_gradients, shape_input_features, shape_max = \
            broadcast_shapes(shape_input_gradients, shape_input_features,
                             param_name_input1="input_gradients",
                             param_name_input2="input_features")

    dtype_input_gradients = input_gradients.get("dtype").lower()
    dtype_input_features = input_features.get("dtype").lower()

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    check_dtype(dtype_input_gradients,
                check_list,
                param_name="input_gradients")
    check_dtype(dtype_input_features, check_list, param_name="input_features")

    shape_input_gradients, shape_input_features = \
        refine_shapes_for_broadcast(shape_input_gradients,
                                    shape_input_features)
    data_input_gradients = tvm.placeholder(shape_input_gradients,
                                           name="data_input_gradients",
                                           dtype=dtype_input_gradients)
    data_input_features = tvm.placeholder(shape_input_features,
                                          name="data_input_features",
                                          dtype=dtype_input_features)

    res = relu_grad_compute(data_input_gradients, data_input_features,
                            output_backprops, kernel_name)
    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_gradients, data_input_features, res]
    }
    te.lang.cce.cce_build_code(sch, config)

示例#28

0

显示文件

文件： l1_loss_grad.py 项目： gekowa/ascend-opp

def l1_loss_grad(grads,
                 predict,
                 label,
                 y,
                 reduction="mean",
                 kernel_name="l1_loss_grad"):
    """
    Parameters
    ----------
    grads : dict
        shape and dtype of grad_out as input
    predict : dict
        shape and dtype of predict as input, should be same shape and type as grads
    label : dict
        shape and dtype of label as input, should be same shape and type as grads
    y : dict
        shape and dtype of output, should be same shape and type as grads
    reduction: string
        reduction name, default value is "mean"
    kernel_name : str
        kernel name, default value is "l1_loss_grad"

    Returns
    -------
    None
    """
    dtype_list = ["float16", "float32"]
    reduction_list = ["none", "mean", "sum"]
    grads_data_type = grads.get("dtype").lower()
    grads_shape = grads.get("shape")
    predict_data_type = predict.get("dtype").lower()
    predict_shape = predict.get("shape")
    label_data_type = label.get("dtype").lower()
    label_shape = label.get("shape")

    op_utils.check_dtype(grads_data_type, dtype_list)
    op_utils.check_dtype(predict_data_type, dtype_list)
    op_utils.check_dtype(label_data_type, dtype_list)

    op_utils.check_shape(grads_shape)
    op_utils.check_shape(predict_shape)
    op_utils.check_shape(label_shape)

    util.compare_tensor_dict_key(grads, predict, "shape")
    util.compare_tensor_dict_key(grads, label, "shape")
    util.compare_tensor_dict_key(grads, predict, "dtype")
    util.compare_tensor_dict_key(grads, label, "dtype")

    if reduction not in reduction_list:
        raise RuntimeError("reduction should be one of ['none','mean','sum']")
    grads = tvm.placeholder(grads_shape, dtype=grads_data_type, name="grads")
    predict = tvm.placeholder(predict_shape,
                              dtype=predict_data_type,
                              name="predict")
    label = tvm.placeholder(label_shape, dtype=label_data_type, name="label")

    res = l1_loss_grad_compute(grads,
                               predict,
                               label,
                               y,
                               reduction=reduction,
                               kernel_name="l1_loss_grad")

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [grads, predict, label, res]}
    te.lang.cce.cce_build_code(schedule, config)

示例#29

0

显示文件

def histogram_fixed_width_d(x,
                            range,
                            y,
                            nbins,
                            dtype="int32",
                            kernel_name='histogram_fixed_width_d'):
    """this operation returns a rank 1 histogram counting
     the number of entries in `values` that fell into every bin.
      The bins are equal width and determined by the arguments
    `value_range` and `nbins`.

    Parameters
    ----------
    x: dict
        dict info of input value, must include the keys(shape and dtype).
    range: dict
        dict info of input value_range, must include the keys(shape and dtype).
                        the shape must be (2,) or [2]
    y: dict
        dict info of output
    nbins: int
        number of histogram bins.
    dtype: str
        data type for returned histogram.
    kernel_name: str
        cce kernel name, default value is "histogram_fixed_width"


    returns
    -------
    None
    """
    input_shape_list = [x.get("shape"), range.get("shape")]
    input_dtype = x.get("dtype")
    dtype_input = input_dtype.lower()

    check_shape(input_shape_list[0], param_name="x")
    check_shape(input_shape_list[1], param_name="range")
    util.compare_tensor_dict_key(x, range, "dtype")
    data_shape_size = util.check_tensor_shape_size(list(input_shape_list[0]))
    data_range_shape_size = util.check_tensor_shape_size(
        list(input_shape_list[1]))

    check_dtype(dtype_input, ("float16", "float32", "int32"), param_name="x")

    if data_range_shape_size != 2:
        raise RuntimeError("the shape of range must be (2,) or [2]")

    if nbins <= 0:
        raise RuntimeError("the nbins must be > 0")

    data = tvm.placeholder([data_shape_size],
                           dtype=dtype_input,
                           name="input_data")
    range_data = tvm.placeholder([data_range_shape_size],
                                 dtype=dtype_input,
                                 name="input_range_data")

    res = histogram_fixed_width_d_compute(data, range_data, y, nbins,
                                          kernel_name)
    sch = tvm.create_schedule(res.op)
    with build_config:
        tvm.build(sch, [data, range_data, res], "cce", name=kernel_name)

示例#30

0

显示文件

def mse_loss_grad(predict,
                  label,
                  dout,
                  grad,
                  reduction="mean",
                  kernel_name="mse_loss_grad"):
    """
    calculating data

    Parameters
    ----------
    predict : dict
        shape and dtype of input
    label : dict
        shape and dtype of output, should be same shape and type as predict
    dout : dict
        shape and dtype of output, should be same shape and type as predict
    grad : dict
        shape and dtype of output, should be same shape and type as predict
    reduction : str
        reduce mode,can be 'mean','sum' or 'none'
    kernel_name : str
        kernel name, default value is "mse_loss_grad"

    Returns
    -------
    None
    """

    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype")
    label_shape = label.get("shape")
    dout_shape = dout.get("shape")
    input_dtype = predict_dtype.lower()
    label_dtype = label.get("dtype").lower()
    dout_dtype = dout.get("dtype").lower()

    util.compare_tensor_dict_key(predict, label, "shape")
    util.compare_tensor_dict_key(predict, dout, "shape")
    util.compare_tensor_dict_key(predict, label, "dtype")
    util.compare_tensor_dict_key(predict, dout, "dtype")

    check_list = ("float16", "float32")
    op_utils.check_dtype(input_dtype, check_list)
    op_utils.check_dtype(label_dtype, check_list)
    op_utils.check_dtype(dout_dtype, check_list)

    op_utils.check_shape(predict_shape)
    op_utils.check_shape(label_shape)
    op_utils.check_shape(dout_shape)

    util.check_kernel_name(kernel_name)

    predict_input = tvm.placeholder(predict_shape,
                                    name="predict_input",
                                    dtype=input_dtype)
    label_input = tvm.placeholder(label_shape,
                                  name="label_input",
                                  dtype=input_dtype)
    dout_input = tvm.placeholder(dout_shape,
                                 name="dout_input",
                                 dtype=input_dtype)

    res = mse_loss_grad_compute(predict_input, label_input, dout_input, grad,
                                reduction, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [predict_input, label_input, dout_input, res]
    }

    te.lang.cce.cce_build_code(schedule, config)