def conv_backprop_filter_run(fmap_shape, filter_shape, pad_, stride_, dilation_, attrs=None): block_size = 16 conv_dtype = 'float16' in_n, in_c, in_h, in_w = fmap_shape cout, cin, w_h, w_w = filter_shape in_c = (in_c + block_size - 1) // block_size * block_size cout = (cout + block_size - 1) // block_size * block_size pad_top, pad_bottom, pad_left, pad_right = pad_ stride_h, stride_w = stride_ out_n = in_n out_c = cout out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1 out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1 x_shape = (in_n, in_c, in_h, in_w) w_shape = (cout, in_c, w_h, w_w) y_shape = (out_n, out_c, out_h, out_w) inN, inC, inH, inW = x_shape input_shape_nc1hwc0 = (inN, inC // block_size, inH, inW, block_size) o_n, o_c, o_h, o_w = y_shape y_shape_nc1hwc0 = (o_n, o_c // block_size, o_h, o_w, block_size) input_shape = [y_shape_nc1hwc0, input_shape_nc1hwc0] input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "") expect_file = input_file + "/" + gen_kernel_name([input_shape], [conv_dtype], op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_], kernel_name='conv_backprop_filter', attrs=attrs) + ".bin" print("gen_data begin.") dy_data, dx_data, expect = gen_data(x_shape, w_shape, pad_, stride_, dilation_, expect_file, attrs=attrs) assert (dy_data.shape == y_shape_nc1hwc0) print("gen_data finished.") out_data = np.full(expect.shape, 0, 'float32') input = (dy_data, dx_data) flag_w = os.environ.get("WRITE_TO_DISK", "No") if flag_w == "Yes": return input, out_data, expect, True mod = utils.op_build_test(conv_backprop_filter, [input_shape], [conv_dtype], op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_], kernel_name='conv_backprop_filter', attrs=attrs) args = (dy_data, dx_data, out_data) out_data = utils.mod_launch(mod, args, expect=expect) rtol, atol = get_rtol_atol("conv_backprop_filter", conv_dtype) return input, out_data, expect, compare_tensor(out_data, expect, rtol=rtol, atol=atol, equal_nan=True)
def rint_run(shape, dtype, attrs=None): """rint_run""" if attrs is None: attrs = {} mod = utils.op_build_test(rint.rint, [shape], [dtype], kernel_name='rint', attrs=attrs) args, exp_output, input_x = gen_data(shape, dtype) acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("rint", dtype) testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return input_x, acu_output, exp_output, testcase_result
def laplacian_of_gaussian_ad_run(shape, dtype, attrs): expect, head_np, input_np = gen_data(dtype, shape) mod = utils.op_build_test(laplacian_of_gaussian_ad, [head_np.shape, shape], [dtype, dtype], kernel_name='mulexp', attrs=attrs) output = np.full(expect.shape, np.nan, dtype) output = utils.mod_launch(mod, (head_np, input_np, output), expect=expect) rtol, atol = get_rtol_atol("laplacian_of_gaussian", dtype) return (head_np, input_np), output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol)
def bessel_i1e_run(x_shape, x_dtype, attrs): shapes = [x_shape] dtypes = [x_dtype] mod = utils.op_build_test(bessel_i1e, shapes, dtypes, kernel_name="bessel_i1e", attrs=attrs) bench_mark, inputs, output = gen_data(dtypes, shapes) output = utils.mod_launch(mod, inputs + [output], expect=bench_mark) rtol, atol = get_rtol_atol("bessel_i1e", dtypes[0]) compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol) return inputs, output, bench_mark, compare_res
def conv_filter_ad_run(fmap_shape, filter_shape, pad_, stride_, dilation_, attrs=None): block_size = 16 conv_dtype = 'float16' in_n, in_c, in_h, in_w = fmap_shape cout, cin, w_h, w_w = filter_shape assert(in_c == cin) in_c = (in_c + block_size - 1) // block_size * block_size cout = (cout + block_size - 1) // block_size * block_size pad_top, pad_bottom, pad_left, pad_right = pad_ stride_h, stride_w = stride_ out_n = in_n out_c = cout out_h = (in_h + pad_top + pad_bottom - w_h) // stride_h + 1 out_w = (in_w + pad_left + pad_right - w_w) // stride_w + 1 x_shape = (in_n, in_c, in_h, in_w) w_shape = (cout, in_c, w_h, w_w) x_5D_shape = (in_n, in_c // block_size, in_h, in_w, block_size) y_5D_shape = (out_n, out_c // block_size, out_h, out_w, block_size) forward_input_output_shapes = [y_5D_shape, x_5D_shape] dw_input_shapes = [y_5D_shape, x_5D_shape] input_file = os.environ.get("RANDOM_DATA_DISK_PATH", "") expect_file = input_file + "/" + gen_kernel_name([dw_input_shapes], [conv_dtype], op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_], kernel_name='conv_filter_ad', attrs=attrs) + ".bin" print("gen_data begin.") dy_data, dx_data, expect = gen_data_dw(x_shape, w_shape, pad_, stride_, dilation_, expect_file, attrs=attrs) print("gen_data finished.") out_data = np.full(expect.shape, 0, 'float32') np_input = (dy_data, dx_data) flag_w = os.environ.get("WRITE_TO_DISK", "No") if flag_w == "Yes": return np_input, out_data, expect, True mod = utils.op_build_test(ConvFilterAd, [dw_input_shapes], [conv_dtype], op_attrs=[fmap_shape, filter_shape, pad_, stride_, dilation_], kernel_name='conv_filter_ad', attrs=attrs, dump_code = True) args = (dy_data, dx_data, out_data) out_data = utils.mod_launch(mod, args, expect=expect) rtol, atol = get_rtol_atol("conv_filter_ad", conv_dtype) assert_res = compare_tensor(out_data, expect, rtol=rtol, atol=atol, equal_nan=True) return np_input, out_data, expect, assert_res
def batchmatmul_execute(bs, m, n, k, bias_shape, dtype, trans_a, trans_b, kernel_name, attrs): # Generate data _, _, out_shape = get_shape(bs, m, n, k, trans_a, trans_b) # this part is for auto-tuning if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = batchmatmul_compile(bs, m, n, k, bias_shape, dtype, trans_a, trans_b, kernel_name, attrs, t) if t: m_a, m_b, matrix_bias, expect = gen_data(bs, m, n, k, bias_shape, trans_a, trans_b, dtype) output = np.full(out_shape, np.nan, dtype=dtype) return mod, expect, (m_a, m_b, matrix_bias, output) if len(bias_shape) > 0 else (m_a, m_b, output) else: return mod mod, args = batchmatmul_compile(bs, m, n, k, bias_shape, dtype, trans_a, trans_b, kernel_name, attrs) m_a, m_b, matrix_bias, expect = gen_data(bs, m, n, k, bias_shape, trans_a, trans_b, dtype) output = np.full(out_shape, np.nan, dtype=dtype) launch_args = [] outputs = [] if len(bias_shape) > 0: launch_args = [m_a, m_b, matrix_bias, output] outputs = [ 3, ] else: launch_args = [m_a, m_b, output] outputs = [ 2, ] if attrs.get("dynamic"): launch_args = launch_args + args block_dim = compute_blockdim(bs, (m, n)) launch_args.append(block_dim) output = utils.mod_launch(mod, launch_args, outputs=outputs, expect=expect) rtol, atol = get_rtol_atol("batchmatmul", dtype) print("-----------------") print(output.dtype) print(expect.dtype) res = compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True) # if not res: # res = utils.double_fivethou_compare(output, expect, r_tol=5e-3, equal_nan=True) return (m_a, m_b), output, expect, res
def matmul_execute(shape_x, shape_y, bias, left_format, right_format, out_format, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs): ''' There are four types of fractal format in Davinci core: zZ, zN, nZ, nN general matmul format left_trans: False right_trans False: zZ * nZ = zN left_trans: True right_trans False: nN * nZ = zN left_trans: False right_trans True : zZ * zN = zN left_trans: True right_trans True : nN * zN = zN Now we need to support: zN * nZ = zN use left_format to specify, left matrix data format use right_format to specify, right matrix data format ''' batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y) m = (m + 15) // 16 * 16 n = (n + 15) // 16 * 16 k = (k + 15) // 16 * 16 shape_xx, shape_yy, bias_shape, out_shape, k = get_converted_shapes( m, n, k, batch_tuple, adj_x, adj_y, bias, left_format, right_format, out_format) mod = dynamic_matmul_compile(shape_x, shape_y, bias, left_format, right_format, out_format, adj_x, adj_y, dtype, out_dtype, kernel_name, attrs) # Generate data m_x, m_y, bench_mark, bias_data = matmul_data(batch_tuple, m, k, n, dtype, out_dtype, bias, adj_x, adj_y, left_format, right_format, out_format) # mod launch output = np.full(out_shape, np.nan, out_dtype) if bias == 0: output = utils.mod_launch( mod, (m_x, m_y, output, 1, 1, 1, 1, 1, 1, 1, 1, 1), outputs=(2, ), expect=bench_mark) elif bias == 1: output = utils.mod_launch(mod, (m_x, m_y, bias_data, output), expect=bench_mark) # compare result rtol, atol = get_rtol_atol("matmul", dtype) compare_result = compare_tensor(output, bench_mark, rtol=rtol, atol=atol, equal_nan=True) # compare_result = utils.result_compare(output, bench_mark, r_tol=5e-3) return (m_x, m_y), output, bench_mark, compare_result
def acosh_grad_run(shape, dtype, attrs): """run function for dsl function acosh_grad.""" shapes = [shape, shape] dtypes = [dtype, dtype] mod = utils.op_build_test(acosh_grad, shapes, dtypes, kernel_name="acosh_grad", attrs=attrs) bench_mark, inputs, output = gen_data(dtype, shape) output = utils.mod_launch(mod, inputs + [output], expect=bench_mark) rtol, atol = get_rtol_atol("acosh_grad", dtype) compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol) return inputs, output, bench_mark, compare_res
def sinh_run(shape, dtype, attrs=None): mod = utils.op_build_test(Sinh, [shape], [dtype], kernel_name="sinh", attrs=attrs) expect, inputs, output = gen_data(dtype, shape) output = utils.mod_launch(mod, (inputs, output), expect=expect) rtol, atol = get_rtol_atol("sinh", dtype) TestCase_Result = compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=False) return inputs, output, expect, TestCase_Result
def div_no_nan_execute(shapes, dtype, attrs): exp_output, inputs, args = gen_data(dtype, shapes) mod = div_no_nan_compile(shapes, dtype, attrs) # result_tvm acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("div_no_nan", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def truncate_div_run(shape1, dtype1, shape2, dtype2, attrs): """run function for truncate_div""" expect, inputs, output = gen_data(dtype1, dtype2, shape1, shape2) mod = utils.op_build_test(truncate_div.truncate_div, [shape1, shape2], [dtype1, dtype2], kernel_name="truncate_div", attrs=attrs) output = utils.mod_launch(mod, (*inputs, output), expect=expect) rtol, atol = get_rtol_atol("truncate_div", dtype1) return inputs, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def clear_zero_run(shape, dtype, attrs, kernel_name="clear_zero"): expect = np.full(shape, 0, dtype) data = np.full(shape, np.nan, dtype) inout = data mod = utils.op_build_test(ClearZero, [shape], [dtype], kernel_name=kernel_name) inout = utils.mod_launch(mod, (inout, ), outputs=(-1, ), expect=expect) rtol, atol = get_rtol_atol("clear_zero", dtype) return (data), inout, expect, compare_tensor(inout, expect, rtol=rtol, atol=atol, equal_nan=True)
def asinh_run(x_shape, x_dtype, attrs): """run function for dsl function asinh.""" shapes = [x_shape] dtypes = [x_dtype] mod = utils.op_build_test(asinh, shapes, dtypes, kernel_name="asinh", attrs=attrs) bench_mark, input_datas, output = gen_data(x_dtype, x_shape) output = utils.mod_launch(mod, input_datas + [output], expect=bench_mark) rtol, atol = get_rtol_atol("asinh", x_dtype) compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol) return input_datas, output, bench_mark, compare_res
def fused_bn_grad_5D_run_1(shape, dtype, kernel_name, attrs): """ test bnGrad_1 """ def get_expect(dy, data, mean): if dy.dtype == "float16": dy = dy.astype("float32") data = data.astype("float32") data_minus_mean = data - mean dgamma_red_hw = np.sum(dy * data_minus_mean, axis=(2,3), keepdims=True) dbeta_red_hw = np.sum(dy, axis=(2,3), keepdims=True) return [dgamma_red_hw, dbeta_red_hw, data_minus_mean] shape_nc1c0 = (shape[0], shape[1], 1, 1, shape[4]) shape_c1c0 = (1, shape[1], 1, 1, shape[4]) bng1_shapes = [shape, shape, shape_c1c0] bng1_dtypes = [dtype, dtype, "float32"] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(fused_bn_grad1, bng1_shapes, bng1_dtypes, kernel_name=kernel_name + "_step1", attrs=attrs, tuning=t) if t: inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng1_shapes, bng1_dtypes)] inputs[2] = np.mean(inputs[1], axis=(0, 2, 3), keepdims=True).astype(bng1_dtypes[2]) out_shapes = [shape_nc1c0, shape_nc1c0, shape] outputs = [np.full(s, np.nan, "float32") for s in out_shapes] expects = get_expect(*inputs) return mod, expects, {"args": (*inputs, *outputs), 'outputs': tuple(range(-len(outputs), 0)), 'tuning': False} else: return mod mod = utils.op_build_test(fused_bn_grad1, bng1_shapes, bng1_dtypes, kernel_name=kernel_name + "_step1", attrs=attrs) # np.random.seed(0) inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng1_shapes, bng1_dtypes)] inputs[2] = np.mean(inputs[1], axis=(0, 2, 3), keepdims=True).astype(bng1_dtypes[2]) out_shapes = [shape_nc1c0, shape_nc1c0, shape] outputs = [np.full(s, np.nan, "float32") for s in out_shapes] outputs = list(utils.mod_launch(mod, (*inputs, *outputs), outputs=tuple(range(-len(outputs), 0)), expect=get_expect(*inputs))) expects = get_expect(*inputs) rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype) results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects)) print("results", results) return inputs, outputs, expects, all(results)
def reduction_layer_execute(shape, dtype, axis, op, coeff, attrs): exp_output, inputs, args = gen_data(shape, dtype, axis, op, coeff) mod = reduction_layer_compile(shape, dtype, axis, op, coeff, attrs) # result_tvm acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("reduction_layer", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def bitwise_and_run(shape1, dtype1, shape2, dtype2, kernel_name, attrs): mod = utils.op_build_test(bitwise_and.bitwise_and, [shape1, shape2], [dtype1, dtype2], kernel_name=kernel_name, attrs=attrs) expect, inputs, output = gen_data(shape1, shape2, dtype1, dtype2) actual = utils.mod_launch(mod, (*inputs, output), expect=expect) rtol, atol = get_rtol_atol("bitwise_and", dtype1) testcase_result = compare_tensor(actual, expect, rtol=rtol, atol=atol, equal_nan=True) return input, actual, expect, testcase_result
def leaky_relu_execute(shape, dtype, negative_slop, attrs): exp_output, inputs, args = gen_data(dtype, shape, negative_slop) mod = leaky_relu_compile(shape, dtype, negative_slop, attrs) # result_tvm acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("leaky_relu", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return inputs, acu_output, exp_output, TestCase_Result
def abs_run(shape, dtype, attrs={}): # Result_Numpy input_shape = [shape] input_dtype = [dtype] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(Abs, input_shape, input_dtype, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: exp_output, inputs, output = gen_date(dtype, shape) return mod, exp_output, (inputs, output) else: return mod else: mod = utils.op_build_test(Abs, input_shape, input_dtype, kernel_name='abs', attrs=attrs) exp_output, inputs, output = gen_date(dtype, shape) acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output) # compare result rtol, atol = get_rtol_atol("abs", dtype) TestCase_Result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) target_name = attrs["target"].split()[0] if attrs.get("profiling", False): target_name = attrs["target"].split()[0] data, output = to_tvm_nd_array([inputs, output], akg.tvm.context(target_name, 0)) target_profiling(mod, data, output, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, acu_output, exp_output, TestCase_Result
def fused_bn_grad_5D_run_2(shape, dtype, eps, kernel_name, attrs): """ test bnGrad_2 """ def get_expect(dgamma_red_hw, dbeta_red_hw, var, gamma, eps, data_shape): m = data_shape[0] * data_shape[2] * data_shape[3] neg_m_rec = -1.0 / m eps = np.array([eps], dtype=var.dtype).reshape([1] * 5) neg_m_rec = np.array([neg_m_rec], dtype=var.dtype).reshape([1] * 5) s = (1.0 / np.sqrt(var + eps)).astype(var.dtype) dgamma = s * np.sum(dgamma_red_hw, axis=0, keepdims=True) dbeta = np.sum(dbeta_red_hw, axis=0, keepdims=True) rs = gamma * s dgamma_dx = neg_m_rec * rs * s * dgamma dbeta_dx = neg_m_rec * rs * dbeta return [dgamma, dbeta, rs, dgamma_dx, dbeta_dx] shape_nc1c0 = (shape[0], shape[1], 1, 1, shape[4]) shape_c1c0 = (1, shape[1], 1, 1, shape[4]) bng2_shapes = [shape_nc1c0, shape_nc1c0, shape_c1c0, shape_c1c0] bng2_dtypes = ["float32"] * len(bng2_shapes) bng2_opattrs = [eps, shape] # np.random.seed(0) inputs = [np.random.rand(*s).astype(t) for (s, t) in zip(bng2_shapes, bng2_dtypes)] out_shapes = [shape_c1c0, shape_c1c0, shape_c1c0, shape_c1c0, shape_c1c0] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(fused_bn_grad2, bng2_shapes, bng2_dtypes, bng2_opattrs, kernel_name=kernel_name + "_step2", attrs=attrs, tuning=t) if t: outputs = [np.full(s, np.nan, "float32") for s in out_shapes] expects = get_expect(*inputs, *bng2_opattrs) return mod, expects, {"args": (*inputs, *outputs), 'outputs': tuple(range(-len(outputs), 0)), 'tuning': False} else: return mod mod = utils.op_build_test(fused_bn_grad2, bng2_shapes, bng2_dtypes, bng2_opattrs, kernel_name=kernel_name + "_step2", attrs=attrs) outputs = [np.full(s, np.nan, "float32") for s in out_shapes] outputs = list(utils.mod_launch(mod, (*inputs, *outputs), outputs=tuple(range(-len(outputs), 0)), expect=get_expect(*inputs, *bng2_opattrs))) expects = get_expect(*inputs, *bng2_opattrs) rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype) results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects)) print("results", results) return inputs, outputs, expects, all(results)
def broadcast_to_run(x_shape, x_dtype, shape, attrs): shapes = [x_shape] dtypes = [x_dtype] op_attrs = [shape] op_name = "broadcast_to" mod = utils.op_build_test(broadcast_to, shapes, dtypes, op_attrs=op_attrs, kernel_name=op_name, attrs=attrs) bench_mark, inputs, output = gen_data(dtypes, shapes, shape) output = utils.mod_launch(mod, inputs + [output], expect=bench_mark) rtol, atol = get_rtol_atol(op_name, x_dtype) compare_res = compare_tensor(output, bench_mark, rtol=rtol, atol=atol) return inputs, output, bench_mark, compare_res
def xlogy_grad_run(shape1, shape2, dtype, attrs): _, _, grad_shape = produce_shapes(shape1, shape2) mod = utils.op_build_test(xlogy_grad.xlogy_grad, [shape1, shape2, grad_shape], [dtype, dtype, dtype], kernel_name="xlogy_grad", attrs=attrs) expects, inputs, outputs = gen_data(shape1, shape2, dtype) reses = utils.mod_launch( mod, (*inputs, *outputs), expect=expects, outputs=(-2, -1)) rtol, atol = get_rtol_atol("xlogy_grad", dtype) TestCase_Results = list(map(lambda x, y: compare_tensor( x, y, rtol=rtol, atol=atol, equal_nan=True), reses, expects)) return inputs, reses, expects, all(TestCase_Results)
def unsorted_segment_sum_run_others(data_shape, data_type, indices_shape, indices_type, num, attrs=None): mod = unsortedsegmentsum_compile(data_shape, indices_shape, num, data_type, attrs, kernel_name='unsortedsegmentsum_run', tuning=False) # gen data input1, input2, expect = gen_data(data_shape, data_type, indices_shape, indices_type, num) output_shape = expect.shape if len(expect.shape) == 0: output_shape = (1, ) #output = np.full(output_shape, np.nan, expect.dtype) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (input1, input2, output), expect=expect) atol, rtol = get_rtol_atol("unsorted_segment_sum", data_type) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: input1, input2, output = to_tvm_nd_array([input1, input2, output], akg.tvm.context( target_name, 0)) target_profiling(mod, input1, input2, output, target=target_name, repeat_time=attrs["repeat_times"]) return (input1, input2), output, expect, res
def five2four_execute(shape4d, out_dtype, format, dtype, attrs): # Generate data op_attrs = [shape4d, out_dtype, format] if attrs is None: attrs = {} if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) input, bench_mark = gen_data(shape4d, dtype, out_dtype, format) shape_5d = input.shape mod = five2four_compile(shape_5d, dtype, op_attrs, attrs, kernel_name=kernel_name, tuning=t) if t: output = np.full(shape4d, np.nan, out_dtype) return mod, bench_mark, (input, output) else: return mod else: input, bench_mark = gen_data(shape4d, dtype, out_dtype, format) # mod launch shape_5d = input.shape mod = five2four_compile(shape_5d, dtype, op_attrs, attrs) output = np.full(shape4d, np.nan, out_dtype) args = [input, output] # if attrs.get("dynamic"): # for i in range(len(shape4d) - 1, -1, -1): # args.append(shape4d[i]) if attrs.get("dynamic"): args.append(shape_5d[0]) args.append(shape_5d[1]) args.append(shape_5d[4]) block_dim = compute_blockdim(shape4d) args.append(block_dim) output = utils.mod_launch(mod, args, outputs=(1, ), expect=bench_mark) # compare result rtol, atol = get_rtol_atol("five2four", dtype) compare_result = compare_tensor(output, bench_mark, rtol=rtol, atol=atol, equal_nan=True) return input, output, bench_mark, compare_result
def reduce_max_run(shape, dtype, axis, keepdims, kernel_name="reduce_max", attrs=None): """run function for dsl function reduce_max""" if attrs is None: attrs = {} op_attrs = [axis, keepdims] if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs, tuning=t) if t: expect, inputs, output = gen_data(axis, dtype, keepdims, shape) return mod, expect, (inputs, output) return mod mod = utils.op_build_test(reduce_max, [shape], [dtype], op_attrs=op_attrs, kernel_name=kernel_name, attrs=attrs) expect, inputs, output = gen_data(axis, dtype, keepdims, shape) output = utils.mod_launch(mod, (inputs, output), expect=expect) rtol, atol = get_rtol_atol("reduce_max", dtype) if attrs.get("profiling", False): import akg target_name = attrs["target"].split()[0] args_list = to_tvm_nd_array([inputs, output], akg.tvm.context(target_name, 0)) target_profiling(mod, *args_list, target=target_name, repeat_time=attrs["repeat_times"]) return inputs, output, expect, compare_tensor(output, expect, rtol=rtol, atol=atol, equal_nan=True)
def gather_run(shape1, dtype1, shape2, dtype2, axis, poly_sch=True, attrs=None): if not attrs: attrs = {"target": "cuda"} op_attrs = [axis] mod = utils.op_build_test(gather, [shape1, shape2], [dtype1, dtype2], op_attrs=op_attrs, polyhedral=poly_sch, attrs=attrs, kernel_name="gather") # gen data params, indices, expect = gen_data(shape1, dtype1, shape2, dtype2, axis) output_shape = expect.shape if len(expect.shape) == 0: output_shape = (1, ) output = np.zeros(output_shape, expect.dtype) output = utils.mod_launch(mod, (params, indices, output), expect=expect) atol, rtol = get_rtol_atol("gather", dtype1) res = compare_tensor(output, expect, rtol=rtol, atol=atol) print("Test {}".format("Pass" if res else "Failed")) target_name = attrs["target"].split()[0] if not res: mod_source = mod if target_name != "llvm": mod_source = mod.imported_modules[0] print("Error {}:========================".format(target_name)) print(mod_source.get_source()) raise AssertionError("Test fail") if attrs["profiling"]: params, indices, output = to_tvm_nd_array([params, indices, output], akg.tvm.context( target_name, 0)) target_profiling(mod, params, indices, output, target=target_name, repeat_time=attrs["repeat_times"]) return (params, indices), output, expect, res
def fake_quant_with_min_max_vars_per_channel_gradient_run( shape_gradient, shape_input, shape_min, shape_max, dtype, num_bits=8, narror_range=False, attrs=None): """fake_quant_with_min_max_vars_per_channel_gradient_run""" mod = utils.op_build_test( fake_quant_with_min_max_vars_per_channel_gradient. fake_quant_with_min_max_vars_per_channel_gradient, [shape_gradient, shape_input, shape_min, shape_max], [dtype, dtype, dtype, dtype], [num_bits, narror_range], kernel_name='fake_quant_with_min_max_vars_per_channel_gradient', attrs=attrs) args, exp_output, input_gradient, input_data, input_min, input_max = gen_data( shape_gradient, shape_input, shape_min, shape_max, dtype, num_bits, narror_range) acu_output = utils.mod_launch(mod, args, expect=exp_output, outputs=(-3, -2, -1)) # compare result rtol, atol = get_rtol_atol( "fake_quant_with_min_max_vars_per_channel_gradient", dtype) testcase_result_0 = compare_tensor(acu_output[0], exp_output[0], rtol=rtol, atol=atol, equal_nan=True) testcase_result_1 = compare_tensor(acu_output[1], exp_output[1], rtol=rtol, atol=atol, equal_nan=True) testcase_result_2 = compare_tensor(acu_output[2], exp_output[2], rtol=rtol, atol=atol, equal_nan=True) testcase_result = list( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), acu_output, exp_output)) return [input_gradient, input_data, input_min, input_max], acu_output, exp_output, all(testcase_result)
def square_execute(shape, dtype, kernel_name, attrs): if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = square_compile(shape, dtype, kernel_name, attrs, tuning=t) if t: args, exp_output, input = method_name(dtype, shape) return mod, exp_output, args else: return mod else: mod = square_compile(shape, dtype, kernel_name, attrs) args, exp_output, input = method_name(dtype, shape) acu_output = utils.mod_launch(mod, args, expect=exp_output) rtol, atol = get_rtol_atol("square", dtype) testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return input, acu_output, exp_output, testcase_result
def selu_run(shape, dtype, attrs): """selu_run implementation""" mod = utils.op_build_test(selu.selu, [shape], [dtype], kernel_name='selu', op_attrs=[], attrs=attrs) args, exp_output, input_data = gen_data(dtype, shape) acu_output = utils.mod_launch(mod, args, expect=exp_output) # compare result rtol, atol = get_rtol_atol("selu", dtype) testcase_result = compare_tensor(acu_output, exp_output, rtol=rtol, atol=atol, equal_nan=True) return input_data, acu_output, exp_output, testcase_result
def unpack_run(shape, dtype, tensor_format, num, axis, attrs): """run function for unpack""" mod = utils.op_build_test(unpack.unpack, [shape], [dtype], op_attrs=[tensor_format, num, axis], kernel_name="unpack", attrs=attrs) data, expects, out_bufs = gen_data(shape, dtype, axis) outputs = utils.mod_launch(mod, (data, *out_bufs), expect=expects, outputs=list(range(-len(out_bufs), 0))) rtol, atol = get_rtol_atol("unpack", dtype) cmp_res = list( map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), outputs, expects)) return data, outputs, expects, all(cmp_res)
def bn_2_run(shape, dtype, momentum, eps, kernel_name, attrs): """Test run function for second part of splited bn""" in_shapes, in_dtypes = get_compile_param(shape, dtype, 2) if 'tuning' in attrs.keys(): t = attrs.get("tuning", False) kernel_name = attrs.get("kernel_name", False) mod = utils.op_build_test(fused_bn2, in_shapes, in_dtypes, op_attrs=[momentum], kernel_name=kernel_name, attrs=attrs, tuning=t) if t: inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2) inplace_binds = ((2, 1), (3, 2)) output_places2 = list(range(-len(output_buffers), 0)) if inplace_binds is not None: for bind in inplace_binds: output_places2[bind[1]] = bind[0] return mod, expects, { "args": (*inputs, *output_buffers), 'outputs': output_places2, 'tuning': False} return mod mod_2 = utils.op_build_test(fused_bn2, in_shapes, in_dtypes, op_attrs=[momentum], kernel_name="fusedbn2_"+kernel_name, attrs=attrs) inputs, output_buffers, expects = gen_data(shape, dtype, momentum, eps, 2) inplace_binds = ((2, 1), (3, 2)) output_places2 = list(range(-len(output_buffers), 0)) if inplace_binds is not None: for bind in inplace_binds: output_places2[bind[1]] = bind[0] res_2 = utils.mod_launch(mod_2, [*inputs, *output_buffers], outputs=output_places2, expect=expects) rtol, atol = get_rtol_atol("bn_split", dtype) cmp_res = list(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), res_2, expects)) return inputs, res_2, expects, all(cmp_res)