示例#1
0
def conv_backprop_filter_run(fmap_shape, filter_shape, pad_, stride_, dilation_, attrs=None):
    block_size = 16
    conv_dtype = 'float16'

    in_n, in_c, in_h, in_w = fmap_shape
    cout, cin, w_h, w_w = filter_shape

    in_c = (in_c + block_size - 1) // block_size * block_size
    cout = (cout + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    stride_h, stride_w = stride_

    out_n = in_n
    out_c = cout
    out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1
    out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1

    x_shape = (in_n, in_c, in_h, in_w)
    w_shape = (cout, in_c, w_h, w_w)
    y_shape = (out_n, out_c, out_h, out_w)
    inN, inC, inH, inW = x_shape
    input_shape_nc1hwc0 = (inN, inC // block_size, inH, inW, block_size)
    o_n, o_c, o_h, o_w = y_shape
    y_shape_nc1hwc0 = (o_n, o_c // block_size, o_h, o_w, block_size)

    input_shape = [y_shape_nc1hwc0, input_shape_nc1hwc0]

    input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "")
    expect_file = input_file + "/" + gen_kernel_name([input_shape], [conv_dtype],
                              op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_],
                              kernel_name='conv_backprop_filter', attrs=attrs) + ".bin"

    print("gen_data begin.")
    dy_data, dx_data, expect = gen_data(x_shape, w_shape, pad_, stride_, dilation_, expect_file, attrs=attrs)
    assert (dy_data.shape == y_shape_nc1hwc0)
    print("gen_data finished.")

    out_data = np.full(expect.shape, 0, 'float32')
    input = (dy_data, dx_data)

    flag_w = os.environ.get("WRITE_TO_DISK", "No")
    if flag_w == "Yes":
        return input, out_data, expect, True

    mod = utils.op_build_test(conv_backprop_filter, [input_shape], [conv_dtype],
                              op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_],
                              kernel_name='conv_backprop_filter', attrs=attrs)
    args = (dy_data, dx_data, out_data)
    out_data = utils.mod_launch(mod, args, expect=expect)
    rtol, atol = get_rtol_atol("conv_backprop_filter", conv_dtype)
    return input, out_data, expect, compare_tensor(out_data, expect, rtol=rtol, atol=atol, equal_nan=True)
示例#2
0
def rint_run(shape, dtype, attrs=None):
    """rint_run"""
    if attrs is None:
        attrs = {}

    mod = utils.op_build_test(rint.rint, [shape], [dtype], kernel_name='rint', attrs=attrs)
    args, exp_output, input_x = gen_data(shape, dtype)
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    # compare result
    rtol, atol = get_rtol_atol("rint", dtype)
    testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True)

    return input_x, acu_output, exp_output, testcase_result
示例#3
0
def laplacian_of_gaussian_ad_run(shape, dtype, attrs):
    expect, head_np, input_np = gen_data(dtype, shape)
    mod = utils.op_build_test(laplacian_of_gaussian_ad, [head_np.shape, shape],
                              [dtype, dtype],
                              kernel_name='mulexp',
                              attrs=attrs)
    output = np.full(expect.shape, np.nan, dtype)
    output = utils.mod_launch(mod, (head_np, input_np, output), expect=expect)
    rtol, atol = get_rtol_atol("laplacian_of_gaussian", dtype)
    return (head_np, input_np), output, expect, compare_tensor(output,
                                                               expect,
                                                               rtol=rtol,
                                                               atol=atol)
示例#4
0
def bessel_i1e_run(x_shape, x_dtype, attrs):
    shapes = [x_shape]
    dtypes = [x_dtype]
    mod = utils.op_build_test(bessel_i1e,
                              shapes,
                              dtypes,
                              kernel_name="bessel_i1e",
                              attrs=attrs)
    bench_mark, inputs, output = gen_data(dtypes, shapes)
    output = utils.mod_launch(mod, inputs + [output], expect=bench_mark)
    rtol, atol = get_rtol_atol("bessel_i1e", dtypes[0])
    compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol)
    return inputs, output, bench_mark, compare_res
示例#5
0
def conv_filter_ad_run(fmap_shape, filter_shape, pad_, stride_, dilation_,  attrs=None):
    
    block_size = 16
    conv_dtype = 'float16'

    in_n, in_c, in_h, in_w = fmap_shape
    cout, cin, w_h, w_w = filter_shape
    assert(in_c == cin)

    in_c = (in_c + block_size - 1) // block_size * block_size
    cout = (cout + block_size - 1) // block_size * block_size

    pad_top, pad_bottom, pad_left, pad_right = pad_
    stride_h, stride_w = stride_

    out_n = in_n
    out_c = cout
    out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1
    out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1

    x_shape = (in_n, in_c, in_h, in_w)
    w_shape = (cout, in_c, w_h, w_w)
    x_5D_shape = (in_n, in_c // block_size, in_h, in_w, block_size)
    y_5D_shape = (out_n, out_c // block_size, out_h, out_w, block_size)

    forward_input_output_shapes = [y_5D_shape, x_5D_shape]
    dw_input_shapes = [y_5D_shape, x_5D_shape]

    input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "")
    expect_file = input_file + "/" + gen_kernel_name([dw_input_shapes], [conv_dtype],
                                                     op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_],
                                                     kernel_name='conv_filter_ad', attrs=attrs) + ".bin"
    print("gen_data begin.")
    dy_data, dx_data, expect = gen_data_dw(x_shape, w_shape, pad_, stride_, dilation_, expect_file, attrs=attrs)
    print("gen_data finished.")

    out_data = np.full(expect.shape, 0, 'float32')
    np_input = (dy_data, dx_data)

    flag_w = os.environ.get("WRITE_TO_DISK", "No")
    if flag_w == "Yes":
        return np_input, out_data, expect, True

    mod = utils.op_build_test(ConvFilterAd, [dw_input_shapes], [conv_dtype],
                              op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_], kernel_name='conv_filter_ad', attrs=attrs, dump_code = True)
    args = (dy_data, dx_data, out_data)
    out_data = utils.mod_launch(mod, args, expect=expect)
    rtol, atol = get_rtol_atol("conv_filter_ad", conv_dtype)
    assert_res = compare_tensor(out_data, expect, rtol=rtol, atol=atol, equal_nan=True)

    return np_input, out_data, expect, assert_res
示例#6
0
def batchmatmul_execute(bs, m, n, k, bias_shape, dtype, trans_a, trans_b,
                        kernel_name, attrs):
    # Generate data
    _, _, out_shape = get_shape(bs, m, n, k, trans_a, trans_b)

    # this part is for auto-tuning
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = batchmatmul_compile(bs, m, n, k, bias_shape, dtype, trans_a,
                                  trans_b, kernel_name, attrs, t)
        if t:
            m_a, m_b, matrix_bias, expect = gen_data(bs, m, n, k, bias_shape,
                                                     trans_a, trans_b, dtype)
            output = np.full(out_shape, np.nan, dtype=dtype)
            return mod, expect, (m_a, m_b, matrix_bias,
                                 output) if len(bias_shape) > 0 else (m_a, m_b,
                                                                      output)
        else:
            return mod

    mod, args = batchmatmul_compile(bs, m, n, k, bias_shape, dtype, trans_a,
                                    trans_b, kernel_name, attrs)
    m_a, m_b, matrix_bias, expect = gen_data(bs, m, n, k, bias_shape, trans_a,
                                             trans_b, dtype)
    output = np.full(out_shape, np.nan, dtype=dtype)
    launch_args = []
    outputs = []
    if len(bias_shape) > 0:
        launch_args = [m_a, m_b, matrix_bias, output]
        outputs = [
            3,
        ]
    else:
        launch_args = [m_a, m_b, output]
        outputs = [
            2,
        ]
    if attrs.get("dynamic"):
        launch_args = launch_args + args
        block_dim = compute_blockdim(bs, (m, n))
        launch_args.append(block_dim)
    output = utils.mod_launch(mod, launch_args, outputs=outputs, expect=expect)
    rtol, atol = get_rtol_atol("batchmatmul", dtype)
    print("-----------------")
    print(output.dtype)
    print(expect.dtype)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
    # if not res:
    #     res = utils.double_fivethou_compare(output, expect, r_tol=5e-3, equal_nan=True)
    return (m_a, m_b), output, expect, res
示例#7
0
def matmul_execute(shape_x, shape_y, bias, left_format, right_format,
                   out_format, adj_x, adj_y, dtype, out_dtype, kernel_name,
                   attrs):
    '''
    There are four types of fractal format in Davinci core: zZ, zN, nZ, nN
    general matmul format
    left_trans: False right_trans False: zZ * nZ = zN
    left_trans: True  right_trans False: nN * nZ = zN
    left_trans: False right_trans True : zZ * zN = zN
    left_trans: True  right_trans True : nN * zN = zN

    Now we need to support: zN * nZ = zN
    use left_format to specify, left matrix data format
    use right_format to specify, right matrix data format
    '''
    batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y)
    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16
    shape_xx, shape_yy, bias_shape, out_shape, k = get_converted_shapes(
        m, n, k, batch_tuple, adj_x, adj_y, bias, left_format, right_format,
        out_format)
    mod = dynamic_matmul_compile(shape_x, shape_y, bias, left_format,
                                 right_format, out_format, adj_x, adj_y, dtype,
                                 out_dtype, kernel_name, attrs)
    # Generate data
    m_x, m_y, bench_mark, bias_data = matmul_data(batch_tuple, m, k, n, dtype,
                                                  out_dtype, bias, adj_x,
                                                  adj_y, left_format,
                                                  right_format, out_format)

    # mod launch
    output = np.full(out_shape, np.nan, out_dtype)
    if bias == 0:
        output = utils.mod_launch(
            mod, (m_x, m_y, output, 1, 1, 1, 1, 1, 1, 1, 1, 1),
            outputs=(2, ),
            expect=bench_mark)
    elif bias == 1:
        output = utils.mod_launch(mod, (m_x, m_y, bias_data, output),
                                  expect=bench_mark)

    # compare result
    rtol, atol = get_rtol_atol("matmul", dtype)
    compare_result = compare_tensor(output,
                                    bench_mark,
                                    rtol=rtol,
                                    atol=atol,
                                    equal_nan=True)
    # compare_result = utils.result_compare(output, bench_mark, r_tol=5e-3)
    return (m_x, m_y), output, bench_mark, compare_result
示例#8
0
def acosh_grad_run(shape, dtype, attrs):
    """run function for dsl function acosh_grad."""
    shapes = [shape, shape]
    dtypes = [dtype, dtype]
    mod = utils.op_build_test(acosh_grad,
                              shapes,
                              dtypes,
                              kernel_name="acosh_grad",
                              attrs=attrs)
    bench_mark, inputs, output = gen_data(dtype, shape)
    output = utils.mod_launch(mod, inputs + [output], expect=bench_mark)
    rtol, atol = get_rtol_atol("acosh_grad", dtype)
    compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol)
    return inputs, output, bench_mark, compare_res
示例#9
0
def sinh_run(shape, dtype, attrs=None):
    mod = utils.op_build_test(Sinh, [shape], [dtype],
                              kernel_name="sinh",
                              attrs=attrs)
    expect, inputs, output = gen_data(dtype, shape)
    output = utils.mod_launch(mod, (inputs, output), expect=expect)
    rtol, atol = get_rtol_atol("sinh", dtype)
    TestCase_Result = compare_tensor(output,
                                     expect,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=False)

    return inputs, output, expect, TestCase_Result
示例#10
0
def div_no_nan_execute(shapes, dtype, attrs):
    exp_output, inputs, args = gen_data(dtype, shapes)
    mod = div_no_nan_compile(shapes, dtype, attrs)
    # result_tvm
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    # compare result
    rtol, atol = get_rtol_atol("div_no_nan", dtype)
    TestCase_Result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return inputs, acu_output, exp_output, TestCase_Result
示例#11
0
def truncate_div_run(shape1, dtype1, shape2, dtype2, attrs):
    """run function for truncate_div"""
    expect, inputs, output = gen_data(dtype1, dtype2, shape1, shape2)
    mod = utils.op_build_test(truncate_div.truncate_div, [shape1, shape2],
                              [dtype1, dtype2],
                              kernel_name="truncate_div",
                              attrs=attrs)
    output = utils.mod_launch(mod, (*inputs, output), expect=expect)
    rtol, atol = get_rtol_atol("truncate_div", dtype1)
    return inputs, output, expect, compare_tensor(output,
                                                  expect,
                                                  rtol=rtol,
                                                  atol=atol,
                                                  equal_nan=True)
示例#12
0
def clear_zero_run(shape, dtype, attrs, kernel_name="clear_zero"):
    expect = np.full(shape, 0, dtype)
    data = np.full(shape, np.nan, dtype)
    inout = data

    mod = utils.op_build_test(ClearZero, [shape], [dtype],
                              kernel_name=kernel_name)
    inout = utils.mod_launch(mod, (inout, ), outputs=(-1, ), expect=expect)
    rtol, atol = get_rtol_atol("clear_zero", dtype)
    return (data), inout, expect, compare_tensor(inout,
                                                 expect,
                                                 rtol=rtol,
                                                 atol=atol,
                                                 equal_nan=True)
示例#13
0
def asinh_run(x_shape, x_dtype, attrs):
    """run function for dsl function asinh."""
    shapes = [x_shape]
    dtypes = [x_dtype]
    mod = utils.op_build_test(asinh,
                              shapes,
                              dtypes,
                              kernel_name="asinh",
                              attrs=attrs)
    bench_mark, input_datas, output = gen_data(x_dtype, x_shape)
    output = utils.mod_launch(mod, input_datas + [output], expect=bench_mark)
    rtol, atol = get_rtol_atol("asinh", x_dtype)
    compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol)
    return input_datas, output, bench_mark, compare_res
示例#14
0
def fused_bn_grad_5D_run_1(shape, dtype, kernel_name, attrs):
    """ test bnGrad_1 """
    def get_expect(dy, data, mean):
        if dy.dtype == "float16":
            dy = dy.astype("float32")
            data = data.astype("float32")
        data_minus_mean = data - mean
        dgamma_red_hw = np.sum(dy * data_minus_mean, axis=(2,3), keepdims=True)
        dbeta_red_hw = np.sum(dy, axis=(2,3), keepdims=True)
        return [dgamma_red_hw, dbeta_red_hw, data_minus_mean]

    shape_nc1c0 = (shape[0], shape[1], 1, 1, shape[4])
    shape_c1c0 = (1, shape[1], 1, 1, shape[4])

    bng1_shapes = [shape, shape, shape_c1c0]
    bng1_dtypes = [dtype, dtype, "float32"]
    
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(fused_bn_grad1,
                            bng1_shapes, bng1_dtypes,
                            kernel_name=kernel_name + "_step1", attrs=attrs, tuning=t)     
        if t:
            inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng1_shapes, bng1_dtypes)]
            inputs[2] = np.mean(inputs[1], axis=(0, 2, 3), keepdims=True).astype(bng1_dtypes[2])
            out_shapes = [shape_nc1c0, shape_nc1c0, shape]
            outputs = [np.full(s, np.nan, "float32") for s in out_shapes] 
            expects = get_expect(*inputs)
            return mod, expects, {"args": (*inputs, *outputs), 'outputs': tuple(range(-len(outputs), 0)),
                                 'tuning': False}
        else:
            return mod        
   
    mod = utils.op_build_test(fused_bn_grad1,
                        bng1_shapes, bng1_dtypes,
                        kernel_name=kernel_name + "_step1", attrs=attrs)
    # np.random.seed(0)
    inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng1_shapes, bng1_dtypes)]
    inputs[2] = np.mean(inputs[1], axis=(0, 2, 3), keepdims=True).astype(bng1_dtypes[2])
    out_shapes = [shape_nc1c0, shape_nc1c0, shape]
    outputs = [np.full(s, np.nan, "float32") for s in out_shapes]
    outputs = list(utils.mod_launch(mod, (*inputs, *outputs), outputs=tuple(range(-len(outputs), 0)),
                                    expect=get_expect(*inputs)))
    expects = get_expect(*inputs)
    rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype)
    results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects))
    print("results", results)
    return inputs, outputs, expects, all(results)
示例#15
0
def reduction_layer_execute(shape, dtype, axis, op, coeff, attrs):
    exp_output, inputs, args = gen_data(shape, dtype, axis, op, coeff)
    mod = reduction_layer_compile(shape, dtype, axis, op, coeff, attrs)
    # result_tvm
    acu_output = utils.mod_launch(mod, args, expect=exp_output)

    # compare result
    rtol, atol = get_rtol_atol("reduction_layer", dtype)
    TestCase_Result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return inputs, acu_output, exp_output, TestCase_Result
示例#16
0
def bitwise_and_run(shape1, dtype1, shape2, dtype2, kernel_name, attrs):
    mod = utils.op_build_test(bitwise_and.bitwise_and, [shape1, shape2],
                              [dtype1, dtype2],
                              kernel_name=kernel_name,
                              attrs=attrs)
    expect, inputs, output = gen_data(shape1, shape2, dtype1, dtype2)
    actual = utils.mod_launch(mod, (*inputs, output), expect=expect)

    rtol, atol = get_rtol_atol("bitwise_and", dtype1)
    testcase_result = compare_tensor(actual,
                                     expect,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)
    return input, actual, expect, testcase_result
示例#17
0
def leaky_relu_execute(shape, dtype, negative_slop, attrs):
    exp_output, inputs, args = gen_data(dtype, shape, negative_slop)
    mod = leaky_relu_compile(shape, dtype, negative_slop, attrs)
    # result_tvm
    acu_output = utils.mod_launch(mod, args, expect=exp_output)

    # compare result
    rtol, atol = get_rtol_atol("leaky_relu", dtype)
    TestCase_Result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return inputs, acu_output, exp_output, TestCase_Result
示例#18
0
def abs_run(shape, dtype, attrs={}):
    # Result_Numpy
    input_shape = [shape]
    input_dtype = [dtype]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Abs,
                                  input_shape,
                                  input_dtype,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            exp_output, inputs, output = gen_date(dtype, shape)
            return mod, exp_output, (inputs, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(Abs,
                                  input_shape,
                                  input_dtype,
                                  kernel_name='abs',
                                  attrs=attrs)
        exp_output, inputs, output = gen_date(dtype, shape)
        acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)

        # compare result
        rtol, atol = get_rtol_atol("abs", dtype)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        target_name = attrs["target"].split()[0]
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            data, output = to_tvm_nd_array([inputs, output],
                                           akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             data,
                             output,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])

        return inputs, acu_output, exp_output, TestCase_Result
示例#19
0
def fused_bn_grad_5D_run_2(shape, dtype, eps, kernel_name, attrs):
    """ test bnGrad_2 """
    def get_expect(dgamma_red_hw, dbeta_red_hw, var, gamma, eps, data_shape):
        m = data_shape[0] * data_shape[2] * data_shape[3]
        neg_m_rec = -1.0 / m
        eps = np.array([eps], dtype=var.dtype).reshape([1] * 5)
        neg_m_rec = np.array([neg_m_rec], dtype=var.dtype).reshape([1] * 5)
        s = (1.0 / np.sqrt(var + eps)).astype(var.dtype)
        dgamma = s * np.sum(dgamma_red_hw, axis=0, keepdims=True)
        dbeta = np.sum(dbeta_red_hw, axis=0, keepdims=True)
        rs = gamma * s
        dgamma_dx = neg_m_rec * rs * s * dgamma
        dbeta_dx = neg_m_rec * rs * dbeta
        return [dgamma, dbeta, rs, dgamma_dx, dbeta_dx]

    shape_nc1c0 = (shape[0], shape[1], 1, 1, shape[4])
    shape_c1c0 = (1, shape[1], 1, 1, shape[4])
    bng2_shapes = [shape_nc1c0, shape_nc1c0, shape_c1c0, shape_c1c0]
    bng2_dtypes = ["float32"] * len(bng2_shapes)
    bng2_opattrs = [eps, shape]
    # np.random.seed(0)
    inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng2_shapes, bng2_dtypes)]
    out_shapes = [shape_c1c0, shape_c1c0, shape_c1c0, shape_c1c0, shape_c1c0]
    
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(fused_bn_grad2,
                        bng2_shapes, bng2_dtypes, bng2_opattrs,
                        kernel_name=kernel_name + "_step2", attrs=attrs, tuning=t)    
        if t:
            outputs = [np.full(s, np.nan, "float32") for s in out_shapes]
            expects = get_expect(*inputs, *bng2_opattrs)
            return mod, expects, {"args": (*inputs, *outputs), 'outputs': tuple(range(-len(outputs), 0)),
                                 'tuning': False}
        else:
            return mod     
    mod = utils.op_build_test(fused_bn_grad2,
                    bng2_shapes, bng2_dtypes, bng2_opattrs,
                    kernel_name=kernel_name + "_step2", attrs=attrs)
    outputs = [np.full(s, np.nan, "float32") for s in out_shapes]
    outputs = list(utils.mod_launch(mod, (*inputs, *outputs), outputs=tuple(range(-len(outputs), 0)),
                                    expect=get_expect(*inputs, *bng2_opattrs)))
    expects = get_expect(*inputs, *bng2_opattrs)
    rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype)
    results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects))
    print("results", results)
    return inputs, outputs, expects, all(results)
示例#20
0
def broadcast_to_run(x_shape, x_dtype, shape, attrs):
    shapes = [x_shape]
    dtypes = [x_dtype]
    op_attrs = [shape]
    op_name = "broadcast_to"
    mod = utils.op_build_test(broadcast_to,
                              shapes,
                              dtypes,
                              op_attrs=op_attrs,
                              kernel_name=op_name,
                              attrs=attrs)
    bench_mark, inputs, output = gen_data(dtypes, shapes, shape)
    output = utils.mod_launch(mod, inputs + [output], expect=bench_mark)
    rtol, atol = get_rtol_atol(op_name, x_dtype)
    compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol)
    return inputs, output, bench_mark, compare_res
示例#21
0
def xlogy_grad_run(shape1, shape2, dtype, attrs):
    _, _, grad_shape = produce_shapes(shape1, shape2)
    mod = utils.op_build_test(xlogy_grad.xlogy_grad,
                              [shape1, shape2, grad_shape],
                              [dtype, dtype, dtype],
                              kernel_name="xlogy_grad", attrs=attrs)
    expects, inputs, outputs = gen_data(shape1, shape2, dtype)
    reses = utils.mod_launch(
        mod, (*inputs, *outputs), expect=expects,
        outputs=(-2, -1))

    rtol, atol = get_rtol_atol("xlogy_grad", dtype)
    TestCase_Results = list(map(lambda x, y: compare_tensor(
        x, y, rtol=rtol, atol=atol, equal_nan=True), reses, expects))

    return inputs, reses, expects, all(TestCase_Results)
示例#22
0
def unsorted_segment_sum_run_others(data_shape,
                                    data_type,
                                    indices_shape,
                                    indices_type,
                                    num,
                                    attrs=None):
    mod = unsortedsegmentsum_compile(data_shape,
                                     indices_shape,
                                     num,
                                     data_type,
                                     attrs,
                                     kernel_name='unsortedsegmentsum_run',
                                     tuning=False)
    # gen data
    input1, input2, expect = gen_data(data_shape, data_type, indices_shape,
                                      indices_type, num)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    #output = np.full(output_shape, np.nan, expect.dtype)
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (input1, input2, output), expect=expect)

    atol, rtol = get_rtol_atol("unsorted_segment_sum", data_type)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        input1, input2, output = to_tvm_nd_array([input1, input2, output],
                                                 akg.tvm.context(
                                                     target_name, 0))
        target_profiling(mod,
                         input1,
                         input2,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (input1, input2), output, expect, res
示例#23
0
def five2four_execute(shape4d, out_dtype, format, dtype, attrs):
    # Generate data
    op_attrs = [shape4d, out_dtype, format]
    if attrs is None:
        attrs = {}
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        input, bench_mark = gen_data(shape4d, dtype, out_dtype, format)
        shape_5d = input.shape
        mod = five2four_compile(shape_5d,
                                dtype,
                                op_attrs,
                                attrs,
                                kernel_name=kernel_name,
                                tuning=t)
        if t:
            output = np.full(shape4d, np.nan, out_dtype)
            return mod, bench_mark, (input, output)
        else:
            return mod
    else:
        input, bench_mark = gen_data(shape4d, dtype, out_dtype, format)
        # mod launch
        shape_5d = input.shape
        mod = five2four_compile(shape_5d, dtype, op_attrs, attrs)

        output = np.full(shape4d, np.nan, out_dtype)
        args = [input, output]
        # if attrs.get("dynamic"):
        #     for i in range(len(shape4d) - 1, -1, -1):
        #         args.append(shape4d[i])
        if attrs.get("dynamic"):
            args.append(shape_5d[0])
            args.append(shape_5d[1])
            args.append(shape_5d[4])
            block_dim = compute_blockdim(shape4d)
            args.append(block_dim)
        output = utils.mod_launch(mod, args, outputs=(1, ), expect=bench_mark)
        # compare result
        rtol, atol = get_rtol_atol("five2four", dtype)
        compare_result = compare_tensor(output,
                                        bench_mark,
                                        rtol=rtol,
                                        atol=atol,
                                        equal_nan=True)
        return input, output, bench_mark, compare_result
示例#24
0
def reduce_max_run(shape,
                   dtype,
                   axis,
                   keepdims,
                   kernel_name="reduce_max",
                   attrs=None):
    """run function for dsl function reduce_max"""
    if attrs is None:
        attrs = {}

    op_attrs = [axis, keepdims]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(reduce_max, [shape], [dtype],
                                  op_attrs=op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, inputs, output = gen_data(axis, dtype, keepdims, shape)
            return mod, expect, (inputs, output)

        return mod

    mod = utils.op_build_test(reduce_max, [shape], [dtype],
                              op_attrs=op_attrs,
                              kernel_name=kernel_name,
                              attrs=attrs)
    expect, inputs, output = gen_data(axis, dtype, keepdims, shape)
    output = utils.mod_launch(mod, (inputs, output), expect=expect)
    rtol, atol = get_rtol_atol("reduce_max", dtype)
    if attrs.get("profiling", False):
        import akg
        target_name = attrs["target"].split()[0]
        args_list = to_tvm_nd_array([inputs, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return inputs, output, expect, compare_tensor(output,
                                                  expect,
                                                  rtol=rtol,
                                                  atol=atol,
                                                  equal_nan=True)
示例#25
0
def gather_run(shape1,
               dtype1,
               shape2,
               dtype2,
               axis,
               poly_sch=True,
               attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    op_attrs = [axis]
    mod = utils.op_build_test(gather, [shape1, shape2], [dtype1, dtype2],
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs,
                              kernel_name="gather")

    # gen data
    params, indices, expect = gen_data(shape1, dtype1, shape2, dtype2, axis)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (params, indices, output), expect=expect)
    atol, rtol = get_rtol_atol("gather", dtype1)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        params, indices, output = to_tvm_nd_array([params, indices, output],
                                                  akg.tvm.context(
                                                      target_name, 0))
        target_profiling(mod,
                         params,
                         indices,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (params, indices), output, expect, res
示例#26
0
def fake_quant_with_min_max_vars_per_channel_gradient_run(
        shape_gradient,
        shape_input,
        shape_min,
        shape_max,
        dtype,
        num_bits=8,
        narror_range=False,
        attrs=None):
    """fake_quant_with_min_max_vars_per_channel_gradient_run"""
    mod = utils.op_build_test(
        fake_quant_with_min_max_vars_per_channel_gradient.
        fake_quant_with_min_max_vars_per_channel_gradient,
        [shape_gradient, shape_input, shape_min, shape_max],
        [dtype, dtype, dtype, dtype], [num_bits, narror_range],
        kernel_name='fake_quant_with_min_max_vars_per_channel_gradient',
        attrs=attrs)
    args, exp_output, input_gradient, input_data, input_min, input_max = gen_data(
        shape_gradient, shape_input, shape_min, shape_max, dtype, num_bits,
        narror_range)
    acu_output = utils.mod_launch(mod,
                                  args,
                                  expect=exp_output,
                                  outputs=(-3, -2, -1))
    # compare result
    rtol, atol = get_rtol_atol(
        "fake_quant_with_min_max_vars_per_channel_gradient", dtype)
    testcase_result_0 = compare_tensor(acu_output[0],
                                       exp_output[0],
                                       rtol=rtol,
                                       atol=atol,
                                       equal_nan=True)
    testcase_result_1 = compare_tensor(acu_output[1],
                                       exp_output[1],
                                       rtol=rtol,
                                       atol=atol,
                                       equal_nan=True)
    testcase_result_2 = compare_tensor(acu_output[2],
                                       exp_output[2],
                                       rtol=rtol,
                                       atol=atol,
                                       equal_nan=True)
    testcase_result = list(
        map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol),
            acu_output, exp_output))
    return [input_gradient, input_data, input_min,
            input_max], acu_output, exp_output, all(testcase_result)
示例#27
0
def square_execute(shape, dtype, kernel_name, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = square_compile(shape, dtype, kernel_name, attrs, tuning=t)
        if t:
            args, exp_output, input = method_name(dtype, shape)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = square_compile(shape, dtype, kernel_name, attrs)
        args, exp_output, input = method_name(dtype, shape)
        acu_output = utils.mod_launch(mod, args, expect=exp_output)
        rtol, atol = get_rtol_atol("square", dtype)
        testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True)
        return input, acu_output, exp_output, testcase_result
示例#28
0
def selu_run(shape, dtype, attrs):
    """selu_run implementation"""
    mod = utils.op_build_test(selu.selu, [shape], [dtype],
                              kernel_name='selu',
                              op_attrs=[],
                              attrs=attrs)
    args, exp_output, input_data = gen_data(dtype, shape)
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    # compare result
    rtol, atol = get_rtol_atol("selu", dtype)
    testcase_result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return input_data, acu_output, exp_output, testcase_result
示例#29
0
def unpack_run(shape, dtype, tensor_format, num, axis, attrs):
    """run function for unpack"""
    mod = utils.op_build_test(unpack.unpack, [shape], [dtype],
                              op_attrs=[tensor_format, num, axis],
                              kernel_name="unpack",
                              attrs=attrs)
    data, expects, out_bufs = gen_data(shape, dtype, axis)
    outputs = utils.mod_launch(mod, (data, *out_bufs),
                               expect=expects,
                               outputs=list(range(-len(out_bufs), 0)))
    rtol, atol = get_rtol_atol("unpack", dtype)

    cmp_res = list(
        map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs,
            expects))

    return data, outputs, expects, all(cmp_res)
示例#30
0
def bn_2_run(shape, dtype, momentum, eps, kernel_name, attrs):
    """Test run function for second part of splited bn"""
    in_shapes, in_dtypes = get_compile_param(shape, dtype, 2)

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(fused_bn2,
                                  in_shapes, in_dtypes,
                                  op_attrs=[momentum],
                                  kernel_name=kernel_name,
                                  attrs=attrs, tuning=t)
        if t:
            inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2)
            inplace_binds = ((2, 1), (3, 2))
            output_places2 = list(range(-len(output_buffers), 0))
            if inplace_binds is not None:
                for bind in inplace_binds:
                    output_places2[bind[1]] = bind[0]
            return mod, expects, {
                "args": (*inputs, *output_buffers),
                'outputs': output_places2,
                'tuning': False}
        return mod

    mod_2 = utils.op_build_test(fused_bn2,
                                in_shapes, in_dtypes,
                                op_attrs=[momentum],
                                kernel_name="fusedbn2_"+kernel_name,
                                attrs=attrs)

    inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2)
    inplace_binds = ((2, 1), (3, 2))
    output_places2 = list(range(-len(output_buffers), 0))
    if inplace_binds is not None:
        for bind in inplace_binds:
            output_places2[bind[1]] = bind[0]
    res_2 = utils.mod_launch(mod_2, [*inputs, *output_buffers],
                             outputs=output_places2, expect=expects)

    rtol, atol = get_rtol_atol("bn_split", dtype)
    cmp_res = list(map(lambda x, y:
                       compare_tensor(x, y, rtol=rtol, atol=atol),
                       res_2, expects))
    return inputs, res_2, expects, all(cmp_res)