Пример #1
0
def elu_grad(grads, activations, y, kernel_name="elu_grad"):
    """
    do element-wise elu_grad operation

    Parameters:
    ----------
    grads: the dict of gradient input, only support float16, float32

    activations: the dict of activation input, only support float16, float32

    y : the dict of output

    kernel_name : cce kernel name, default value is "cce_elu_grad"

    Returns
    -------
    None
    """

    shape_gradient = grads.get("shape")
    shape_activation = activations.get("shape")
    dtype_gradient = grads.get("dtype")
    dtype_activation = activations.get("dtype")

    check_shape(shape_gradient, param_name="grads")
    check_shape(shape_activation, param_name="activations")
    if not operator.eq(shape_gradient, shape_activation):
        raise RuntimeError("all input shape must be equal")
    shape_gradient, _ = refine_shape_axes(shape_gradient, [])
    shape_activation, _ = refine_shape_axes(shape_activation, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_gradient, check_list, param_name="grads")
    check_dtype(dtype_activation, check_list, param_name="activations")
    if dtype_gradient.lower() != dtype_activation.lower():
        raise RuntimeError("all input dtype must be same")

    dtype = dtype_gradient.lower()
    data_gradient = tvm.placeholder(shape_gradient,
                                    dtype=dtype,
                                    name="data_gradient")
    data_activation = tvm.placeholder(shape_activation,
                                      dtype=dtype,
                                      name="data_activation")
    res = elu_grad_compute(data_gradient, data_activation, y, kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": [data_gradient, data_activation, res]
    }
    te.lang.cce.cce_build_code(auto_sch, config)
Пример #2
0
def acos_grad(y, dy, z, kernel_name="acos_grad"):
    """
    do element-wise acos_grad operation between two input tensors

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32

    dy : dict of dy, include shape and dtype, dtype support float16, float32

    z : dict of z, include shape and dtype, dtype support float16, float32

    kernel_name : cce kernel name, default value is "acos_grad"
    -------
    """

    # get the shape and dtype for input_1,input_2
    shape_y = y.get("shape")
    shape_dy = dy.get("shape")
    dtype = y.get("dtype")
    dtype1 = dy.get("dtype")

    check_shape(shape_y, param_name="y")
    check_shape(shape_dy, param_name="dy")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    # raise runtimeerror if the input paras are invalid
    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="y")
    check_dtype(dtype1, check_list, param_name="dy")
    dtype = dtype.lower()
    dtype1 = dtype1.lower()
    if not operator.eq(shape_y, shape_dy):
        raise RuntimeError(
            "acos_grad only support input shape while input_shape1 equals"
            " to input_shape2")
    if dtype != dtype1:
        raise RuntimeError(
            "acos_grad only support dtype while input_dtype1 equals"
            " to input_dtype2")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    data_y = tvm.placeholder(shape_y, dtype=dtype, name="data1")
    data_dy = tvm.placeholder(shape_dy, dtype=dtype, name="data2")

    res = acos_grad_compute(data_y, data_dy, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": (data_y, data_dy, res)}
    te.lang.cce.cce_build_code(sch, config)
Пример #3
0
def asin_grad(y, dy, z, kernel_name="asin_grad"):
    """
    do element-wise asin_grad operation between two input tensors

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32

    dy : dict of dy, include shape and dtype, dtype support float16, float32

    z : dict of output

    kernel_name : cce kernel name, default value is "asin_grad"

    Returns
    -------
    None
    """

    # get the shape and dtype
    shape_y = y.get("shape")
    shape_dy = dy.get("shape")
    dtype_y = y.get("dtype")
    dtype_dy = dy.get("dtype")

    # kernel name check: should be unique

    # check whether the shape is right
    check_shape(shape_y, param_name="y")
    check_shape(shape_dy, param_name="dy")
    if not operator.eq(shape_y, shape_dy):
        raise RuntimeError("all input shape must be the same")
    shape_y, _ = refine_shape_axes(shape_y, [])
    shape_dy, _ = refine_shape_axes(shape_dy, [])

    # check whether dtypes are fp16,fp32 and whether they are the same
    check_list = ("float16", "float32")
    check_dtype(dtype_y, check_list, param_name="y")
    check_dtype(dtype_dy, check_list, param_name="dy")
    dtype_y = dtype_y.lower()
    if dtype_y != dtype_dy.lower():
        raise RuntimeError("all input dtype must be same")

    # get 2 input tensors: data_y, data_dy
    data_y = tvm.placeholder(shape_y, name="data_y", dtype=dtype_y)
    data_dy = tvm.placeholder(shape_y, name="data_dy", dtype=dtype_y)
    res = asin_grad_compute(data_y, data_dy, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name, "tensor_list": [data_y, data_dy, res]}
    te.lang.cce.cce_build_code(sch, config)
Пример #4
0
def atan_grad(y, dy, z, kernel_name="atan_grad"):
    """
    Gradient calculation for atan(x)

    Parameters:
    ----------
    y : dict of y, include shape and dtype, dtype support float16, float32
    dy : dict of dy, include shape and dtype, dtype support float16, float32
    z : dict of output, include shape and dtype
    kernel_name : cce kernel name, default value is atan_grad

    Algorithm :
    ----------
    forward :
        y = atan(x)
    backward gradient :
        de/dx = dy/dx*de/dy = 1/(1+x^2)*grad

    Returns
    ----------
    None
    """

    # get the shape and dtype
    shape = y.get("shape")
    shape_grad = dy.get("shape")
    dtype = y.get("dtype")
    dtype_grad = dy.get("dtype")

    # check whether kernel name is unique

    # check whether the shape is right
    check_shape(shape, param_name="y")
    check_shape(shape_grad, param_name="dy")
    if not operator.eq(shape, shape_grad):
        raise RuntimeError("all input shape must be the same")
    shape, _ = refine_shape_axes(shape, [])

    # check whether dtypes are fp16,fp32 and whether they are the same
    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="y")
    check_dtype(dtype_grad, check_list, param_name="dy")
    dtype = dtype.lower()
    if dtype != dtype_grad.lower():
        raise RuntimeError("all input dtype must be same")

    # get 2 input placeholders: data_input, grad
    data_input = tvm.placeholder(shape, name="input_data", dtype=dtype)
    grad = tvm.placeholder(shape, name="input_grad", dtype=dtype)

    # compute the backward gradient
    res = atan_grad_compute(data_input, grad, z, kernel_name)

    with tvm.target.cce():
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [data_input, grad, res]}
    te.lang.cce.cce_build_code(sch, config)
Пример #5
0
def data_format_dim_map(x,
                        y,
                        src_format="NHWC",
                        dst_format="NCHW",
                        kernel_name="data_format_dim_map"):
    """
    Returns the dimension index in the destination data format given the one in.

    Parameters
    ----------
    x : A Tensor with each element as a dimension index in source data format.
        Must be the following types: `int32`. Must be in the range [-4, 4).
    y : Shape and dtype of y, reserved parameter, not used now.
    src_format : An optional `string`. Defaults to `"NHWC"`. source data format.
    dst_format : An optional `string`. Defaults to `"NCHW"`. destination data format.
    kernel_name : CCE kernel name, default value is "data_format_dim_map" (optional).

    Returns
    -------
    None
    """

    shape_input = x.get("shape")
    dtype_input = x.get("dtype")

    # check kernel name, shape, size, dtype
    check_shape(shape_input, param_name="x")
    shape_input, _ = refine_shape_axes(shape_input, [])
    check_list = ("int32", )
    dtype_input = dtype_input.lower()
    check_dtype(dtype_input, check_list, param_name="x")

    # check length of format
    if len(src_format) != 4:
        raise ValueError(
            "source format must of length 4, received src_format = %s" %
            src_format)

    if len(dst_format) != 4:
        raise ValueError(
            "destination format must of length 4, received dst_format = %s" %
            dst_format)
    # get data and compute
    data_input = tvm.placeholder(shape_input,
                                 dtype=dtype_input,
                                 name="data_input")
    res = _data_format_dim_map_compute(data_input, y, src_format, dst_format,
                                       kernel_name)

    with tvm.target.cce():
        sch = topi.generic.auto_schedule(res)
    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": (data_input, res),
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)
Пример #6
0
def tensor_equal(input_x, input_y, output_z, kernel_name="tensor_equal"):
    '''
    True if two tensors have the same size and elements, False otherwise

    :param input_x: dict
                input tenser x
    :param input_y: dict
                input tensor y
    :param kernel_name: str
                  kernel name, default value is "tensor_equal"
    :return: none
    '''

    shape_x = input_x.get("shape")
    dtype_x = input_x.get("dtype")
    shape_y = input_y.get("shape")
    dtype_y = input_y.get("dtype")

    check_shape(shape_x)
    check_shape(shape_y)

    check_list = ("float16", "float32", "int32", "int8", "uint8")
    check_dtype(dtype_x, check_list)
    check_dtype(dtype_y, check_list)

    shape_x = list(shape_x)
    shape_x, _ = refine_shape_axes(shape_x, [])
    data_input_x = tvm.placeholder(shape_x, name="data_input_x", dtype=dtype_x)
    shape_y, _ = refine_shape_axes(shape_y, [])
    data_input_y = tvm.placeholder(shape_y, name="data_input_y", dtype=dtype_y)

    # use vsub method compute equal result
    res = tensor_equal_compute_use_sub(data_input_x, data_input_y, output_z,
                                       kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input_x, data_input_y, res],
        "bool_storage_as_1bit": False
    }

    te.lang.cce.cce_build_code(schedule, config)
Пример #7
0
def assign_sub(var, value, out, kernel_name='assign_sub'):
    """
    Update var by subtracting value from it.

    Parameters:
    ----------
    var : dict
        dict of input_var, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32

    value : dict
        dict of input_value, include shape and dtype,
        dtype support int8, uint8, int32, float16, float32.
        Must have the same shape and dtype as input_var

    out : dict
        dict of out

    kernel_name : str
        cce kernel name, default value is "assign_sub"

    Returns
    -------
    None
    """

    # get the shape and dtype
    shape_var = var.get("shape")
    shape_value = value.get("shape")
    dtype_var = var.get("dtype")
    dtype_value = value.get("dtype")

    # kernel name check: should be unique

    # check whether the shape is right
    check_shape(shape_var, param_name="var")
    check_shape(shape_value, param_name="value")
    if not operator.eq(shape_var, shape_value):
        raise RuntimeError("all input shape must be the equal")

    # check whether dtypes are fp16, fp32, int8, uint8, int32
    # and whether they are the same
    check_list = ("float16", "float32", "int8", "uint8", "int32")
    check_dtype(dtype_var, check_list, param_name="var")
    check_dtype(dtype_value, check_list, param_name="value")
    dtype_var = dtype_var.lower()
    dtype_value = dtype_value.lower()
    if dtype_var != dtype_value:
        raise RuntimeError("all input dtype must be same")

    shape, _ = refine_shape_axes(shape_var, [])
    data_var = tvm.placeholder(shape, dtype=dtype_var, name='data_var')
    data_value = tvm.placeholder(shape, dtype=dtype_value, name='data_value')
    sch, res = _assign_sub_compute(data_var, data_value, out, kernel_name)

    with set_bool_storage_config():
        tvm.build(sch, [data_var, data_value, res], "cce", name=kernel_name)
Пример #8
0
def asinh(input_x, output_y, kernel_name="asinh"):
    """
    algrithm: asinh(x) = log(x + sqrt(x^2 + 1))

    Parameters
    ----------
    input_x: the dict of input_x, only support float16, float32

    output_y : the dict of output_y

    kernel_name : cce kernel name, default value is "asinh"

    Returns
    -------
    None

    """

    shape_input = input_x.get("shape")
    dtype_input = input_x.get("dtype")

    check_shape(shape_input, param_name="input_x")
    shape_input, _ = refine_shape_axes(shape_input, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="input_x")

    inp_dtype = dtype_input.lower()
    shape_input = (functool_reduce(lambda x, y: x * y, shape_input), )
    data_input = tvm.placeholder(shape_input,
                                 dtype=inp_dtype,
                                 name="data_input")

    with tvm.target.cce():
        if tbe_platform.cce_conf.api_check_support("te.lang.cce.vlog",
                                                   "float32") or not \
                tbe_platform.cce_conf.api_check_support("te.lang.cce.vrec",
                                                        "float32"):
            res = asinh_compute_cloud(data_input, output_y, kernel_name)
        else:
            res = asinh_compute_mini(data_input, output_y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [data_input, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(sch, config)
Пример #9
0
def asin(x, y, kernel_name="asin"):
    """
    ----------
    asin(x) = | arcsin(sqrt(1-x^2)) - HALF_PI, x belongs to (-1, 2^(-0.5))
              | the 15th order taylor expansion, x belongs to (-2^(-0.5), 2^(-0.5))
              | HALF_PI - arcsin(sqrt(1-x^2)), x belongs to (2^(-0.5), 1)

    Parameters:
    ----------
    x : the placeholder of data input

    y : the dict of output

    kernel_name : cce kernel name, default value is "asin"

    Returns : None
    -------
    """
    shape_input = x.get("shape")
    dtype_input = x.get("dtype")

    check_shape(shape_input, param_name="x")
    shape_input, _ = refine_shape_axes(shape_input, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="x")

    inp_dtype = dtype_input.lower()
    data_input = tvm.placeholder(shape_input,
                                 dtype=inp_dtype,
                                 name="data_input")

    res = asin_compute(data_input, y, kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": [data_input, res],
        "bool_storage_as_1bit": False
    }

    te.lang.cce.cce_build_code(auto_sch, config)
Пример #10
0
def atanh(x, y, kernel_name="atanh"):
    """
    Algrithm: atanh(x) = atanh

    Parameters
    ----------
    Algorithm: atanh

    Parameters:

    x: the dict of input data, only support float16, float32.

    y: the dict of output

    kernel_name: cce kernel name, default value is "atanh".

    Returns
    -------
    None
    """

    shape = x.get("shape")
    dtype = x.get("dtype")

    check_shape(shape, param_name="x")
    shape, _ = refine_shape_axes(shape, [])

    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="x")

    dtype = dtype.lower()
    input_data = tvm.placeholder(shape, dtype, "input_data")

    with tvm.target.cce():
        res = atanh_compute(input_data, y, kernel_name)
        sch = generic.auto_schedule(res)

    config = {"name": kernel_name,
              "tensor_list": [input_data, res],
              "print_ir": False,
              "bool_storage_as_1bit": False
             }

    te.lang.cce.cce_build_code(sch, config)
Пример #11
0
def atan(x, y, kernel_name="atan"):
    """
    Algorithm: atan

    ----------------------------------
    Parameters:

    x: the dict of input data, only support float16, float32.

    y: the dict of output

    kernel_name: cce kernel name, default value is "atan".

    ----------------------------------
    Returns:

        None

    """
    shape = x.get("shape")
    dtype = x.get("dtype")

    check_shape(shape, param_name="x")
    shape, _ = refine_shape_axes(shape, [])

    check_list = ("float16", "float32")
    check_dtype(dtype, check_list, param_name="x")

    with tvm.target.cce():
        dtype = dtype.lower()
        input_data = tvm.placeholder(shape, dtype=dtype, name="input_data")
        res = atan_compute(input_data, y, kernel_name)
        res = te.lang.cce.cast_to(res, dtype)
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "print_ir": False,
        "tensor_list": (input_data, res)
    }

    te.lang.cce.cce_build_code(auto_sch, config)
Пример #12
0
def acosh(input_data, output_res, kernel_name="acosh"):
    """
    calculating data's acosh,y= log(x+sqrt(x^(2)-1))

    Parameters
    ----------
    input_data: the dict of input, only support float16, float32

    output_res : the dict of output

    kernel_name : cce kernel name, default value is "cce_acosh"

    Returns
    -------
    None

    """

    shape_input = input_data.get("shape")
    dtype_input = input_data.get("dtype")
    check_shape(shape_input, param_name="input_data")

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="input_data")
    shape_input, _ = refine_shape_axes(shape_input, [])

    input_dtype = dtype_input.lower()
    data = tvm.placeholder(shape_input, dtype=input_dtype, name="data_input")

    res = acosh_compute(data, output_res, kernel_name)

    with tvm.target.cce():
        sch = topi.generic.auto_schedule(res)

    config = {"name": kernel_name,
              "print_ir": False,
              "tensor_list": (data, res),
              "bool_storage_as_1bit": False}

    te.lang.cce.cce_build_code(sch, config)
Пример #13
0
def bessel_i1e(x, y, kernel_name="bessel_i1e"):
    """
    Algrithm: calculating data's bessel

    Parameters
    ----------
    x: the dict of input, only support float16, float32

    y : the dict of output

    kernel_name : cce kernel name, default value is "bessel_i1e"

    Returns
    -------
    None
    """

    shape_input = x.get("shape")
    dtype_input = x.get("dtype")

    check_shape(shape_input, param_name="x")
    shape_input, _ = refine_shape_axes(shape_input, [])

    check_list = ("float16", "float32")
    check_dtype(dtype_input, check_list, param_name="x")

    input_dtype = dtype_input.lower()
    data = tvm.placeholder(shape_input, dtype=input_dtype, name="data_input")

    res = bessel_i1e_compute(data, y, kernel_name)

    with tvm.target.cce():
        sch = topi.generic.auto_schedule(res)

    config = {"name": kernel_name,
              "print_ir": False,
              "tensor_list": (data, res),
              "bool_storage_as_1bit": False}
    te.lang.cce.cce_build_code(sch, config)
Пример #14
0
def mse_loss(predict, label, y, reduction='mean', kernel_name="mse_loss"):
    '''
    calculating data
    sum = (predict_n - label_n)^2
    if  reduction == sum: res = sum output a scalal
    if reduction == mean: res == sum/total_number_of_tensor output a scalar
    if reduction == none: res == (predict_n - label_n)^2  output a tensor

    :param predict: dict
                    shape and dtype of tensor predict
    :param label: dict
                    shape and dtype of tensor real label,
                    should be same shape and dtype as predict
    :param y: dict
              shape and dtype of output, loss result after compute
    :param reduction: str
                      Specifies the reduction to apply to the output:'none' | 'mean' | 'sum'
                      Default: 'mean'
                      'none': no reduction will be applied,
                      'mean': the sum of the output will be divided by the number
                            of elements in the output
                      'sum': the output will be summed. Note: size_average and reduce
                           are in the process of being deprecated
                           and in the meantime, specifying either of those
                           two args will override reduction.
    :param kernel_name: str
                      kernel name, default value is "mse_loss"
    :return: none
    '''

    predict_shape = predict.get("shape")
    predict_dtype = predict.get("dtype")
    predict_dtype_lower = predict_dtype.lower()

    label_shape = label.get("shape")
    label_dtype = label.get("dtype")
    label_dtype_lower = label_dtype.lower()

    # check dtype
    dtype_list = ("float16", "float32")
    op_utils.check_dtype(predict_dtype, dtype_list)
    op_utils.check_dtype(predict_dtype, dtype_list)

    # check shape
    op_utils.check_shape(predict_shape)
    op_utils.check_shape(label_shape)

    # check kernel_name
    util.check_kernel_name(kernel_name)

    predict_size, _ = op_utils.refine_shape_axes(predict_shape, [])
    data_predict = tvm.placeholder(predict_size, dtype=predict_dtype_lower, name="data_predict")

    label_size, _ = op_utils.refine_shape_axes(label_shape, [])
    data_label = tvm.placeholder(label_size, dtype=label_dtype_lower, name="data_label")

    if predict_size != label_size:
        raise RuntimeError("predict tensor size don't match label tensor")
    if reduction not in ("mean", "sum", "none"):
        raise RuntimeError("reduction type should in mean/sum/none")

    res = mse_loss_compute(data_predict, data_label, reduction, kernel_name)

    with tvm.target.cce():
        schedule = generic.auto_schedule(res)

    config = {"print_ir": False,
              "name": kernel_name,
              "tensor_list": [data_predict, data_label, res]}
    te.lang.cce.cce_build_code(schedule, config)
Пример #15
0
def approximate_equal(input_x,
                      input_y,
                      output_z,
                      tolerance=1e-5,
                      kernel_name="approximate_equal"):
    """
    abs(x-y) <= tolerance
    Parameters
    ----------
    input_x : dict, include shape and dtype, support fp16 and fp32
        shape of tensors, assume src_shape equals dst_shape

    input_y : dict, include shape and dtype, support fp16 and fp32
        shape of tensors, assume src_shape equals dst_shape

    output_z : dict, include shape and dtype, reserve

    tolerance: default 1e-5

    kernel_name : str
        cce kernel name, default value is "approximate_equal"

    Returns
    ------
    None
    """

    shape_x = input_x.get("shape")
    shape_y = input_y.get("shape")
    in_dtype = input_x.get("dtype")
    in_y_dtype = input_y.get("dtype")

    if tolerance < 0:
        raise RuntimeError("tolerance should >= 0")

    # check shape
    if not operator.eq(shape_x, shape_y):
        raise RuntimeError("all input shape must same")
    check_shape(shape_x, param_name="input_x")
    shape_x, _ = refine_shape_axes(shape_x, [])
    shape_y, _ = refine_shape_axes(shape_y, [])

    # check input tensor data_type
    check_list = ("float16", "float32")
    check_dtype(in_dtype, check_list, param_name="input_x")
    check_dtype(in_y_dtype, check_list, param_name="input_y")
    in_dtype = input_x.get("dtype").lower()
    in_y_dtype = input_y.get("dtype").lower()
    if not operator.eq(in_dtype, in_y_dtype):
        raise RuntimeError("all input type must same.")

    in_data_x = tvm.placeholder(shape_x, name="shape_x", dtype=in_dtype)
    in_data_y = tvm.placeholder(shape_y, name="shape_y", dtype=in_dtype)
    res = approximate_equal_compute(in_data_x, in_data_y, output_z, tolerance,
                                    kernel_name)

    with tvm.target.cce():
        auto_sch = topi.generic.auto_schedule(res)

    config = {
        "name": kernel_name,
        "tensor_list": [in_data_x, in_data_y, res],
        "bool_storage_as_1bit": False
    }
    te.lang.cce.cce_build_code(auto_sch, config)