Exemplo n.º 1
0
def kldiv_loss_run(shape,
                   dtype,
                   reduction='none',
                   kernel_name="kldiv_loss",
                   attrs=None):
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    op_attrs = [reduction]

    if not product_is_mini():
        attrs['enable_multicore'] = True

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(kldiv_loss.kldiv_loss,
                                  input_shape,
                                  input_dtype,
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, output, prediction, target = gen_data(
                dtype, reduction, shape)
            return mod, expect, (prediction, target, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(kldiv_loss.kldiv_loss,
                                  input_shape,
                                  input_dtype,
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        expect, output, prediction, target = gen_data(dtype, reduction, shape)
        output = utils.mod_launch(mod, (prediction, target, output),
                                  expect=expect)
        return (prediction, target), output, expect, compare_tensor(output,
                                                                    expect,
                                                                    rtol=0.005,
                                                                    atol=0.005)
Exemplo n.º 2
0
def test_fused_bn_reduce_grad(in_shape,
                              in_dtype="float16",
                              layout='NHWC',
                              out_dtype='float16',
                              poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout,
                                      out_dtype)
    input_shape_list = [i.shape for i in inputs]
    input_dtype_list = [inter_dtype] * 3 + [in_dtype
                                            ] + [inter_dtype] * 3 + [in_dtype]
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build(fused_bn_reduce_grad_auto,
                             input_shape_list,
                             input_dtype_list,
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_bn_reduce_grad_manual,
                             input_shape_list,
                             input_dtype_list,
                             op_attrs=op_attrs)

    outputs = [output]
    arglist = inputs + outputs
    output = utils.mod_launch(mod, arglist, expect=expect)

    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Exemplo n.º 3
0
def strided_slice_execute(shape, begin, end, strides, begin_mask, end_mask,
                          ellipsis_mask, new_axis_mask, shrink_axis_mask,
                          dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = strided_slice_compile(shape,
                                    begin,
                                    end,
                                    strides,
                                    begin_mask,
                                    end_mask,
                                    ellipsis_mask,
                                    new_axis_mask,
                                    shrink_axis_mask,
                                    dtype,
                                    attrs,
                                    kernel_name=kernel_name,
                                    tuning=t)
        if t:
            expect, input, output = gen_data(begin, begin_mask, dtype,
                                             ellipsis_mask, end, end_mask,
                                             new_axis_mask, shape,
                                             shrink_axis_mask, strides)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        mod = strided_slice_compile(shape, begin, end, strides, begin_mask,
                                    end_mask, ellipsis_mask, new_axis_mask,
                                    shrink_axis_mask, dtype, attrs)
        expect, input, output = gen_data(begin, begin_mask, dtype,
                                         ellipsis_mask, end, end_mask,
                                         new_axis_mask, shape,
                                         shrink_axis_mask, strides)
        output = utils.mod_launch(mod, (input, output), expect=expect)
        rtol, atol = get_rtol_atol("strided_slice", dtype)
        return input, output, expect, compare_tensor(output,
                                                     expect,
                                                     rtol=rtol,
                                                     atol=atol,
                                                     equal_nan=True)
Exemplo n.º 4
0
def gelu_ad_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        if dtype == 'float16' and not product_is_mini():
            mod = utils.op_build_test(gelu_ad.gelu_ad_custom, [shape, shape],
                                      [dtype, dtype],
                                      kernel_name=kernel_name,
                                      attrs=attrs,
                                      tuning=t)
        else:
            mod = utils.op_build_test(gelu_ad.gelu_ad, [shape, shape],
                                      [dtype, dtype],
                                      kernel_name=kernel_name,
                                      attrs=attrs,
                                      tuning=t)

        if t:
            input_np, head_np, output, expect = gelu_grad_data(shape, dtype)
            return mod, expect, (head_np, input_np, output)
        else:
            return mod
    else:
        if dtype == 'float16' and not product_is_mini():
            mod = utils.op_build_test(gelu_ad.gelu_ad_custom, [shape, shape],
                                      [dtype, dtype],
                                      kernel_name="gelu_ad",
                                      attrs=attrs)
        else:
            mod = utils.op_build_test(gelu_ad.gelu_ad, [shape, shape],
                                      [dtype, dtype],
                                      kernel_name="gelu_ad",
                                      attrs=attrs)

        input_np, head_np, output, expect = gelu_grad_data(shape, dtype)
        output = utils.mod_launch(mod, (head_np, input_np, output),
                                  expect=expect)
        rtol, atol = get_rtol_atol("gelu_ad", dtype)
        return (input_np, head_np), output, expect, compare_tensor(output,
                                                                   expect,
                                                                   rtol=rtol,
                                                                   atol=atol)
Exemplo n.º 5
0
def insn_vec_binary_elemwise_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(insn_vec_binary_elemwise.insn_vec_binary_elemwise, [shape, shape], [dtype, dtype],
                                  kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            args, exp_output, inputs = gen_data(dtype, shape)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = utils.op_build_test(insn_vec_binary_elemwise.insn_vec_binary_elemwise, [shape, shape], [dtype, dtype],
                                  kernel_name='insn_vec_binary_elemwise', attrs=attrs)
        args, exp_output, inputs = gen_data(dtype, shape)
        acu_output = utils.mod_launch(mod, args, expect=exp_output)
        # compare result
        TestCase_Result = compare_tensor(acu_output, exp_output, rtol=5e-03, equal_nan=True)

        return inputs, acu_output, exp_output, TestCase_Result
Exemplo n.º 6
0
def test_ms_divide(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(divide_auto, [shape, shape], [dtype, dtype],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(divide_manual, [shape, shape], [dtype, dtype])
    lhs, rhs, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:==========================")
        print(mod.imported_modules[0].get_soure())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)
Exemplo n.º 7
0
def approximate_equal_run(x_shape,
                          x_dtype,
                          y_shape,
                          y_dtype,
                          tolerance=None,
                          attrs=None):
    shapes = [x_shape, y_shape]
    dtypes = [x_dtype, y_dtype]
    op_attrs = None
    if tolerance:
        op_attrs = [tolerance]
    mod = utils.op_build_test(approximate_equal,
                              shapes,
                              dtypes,
                              op_attrs,
                              kernel_name="approximate_equal",
                              attrs=attrs)
    benchMark, inputs, output = gen_data(x_dtype, shapes, tolerance)
    output = utils.mod_launch(mod, inputs + [output], expect=benchMark)
    return inputs, output, benchMark, np.array_equal(output, benchMark)
Exemplo n.º 8
0
def test_fused_is_finite(shape, layout='NHWC', poly_sch=False):
    dtype = "float32"
    if poly_sch:
        mod = utils.op_build_test(fused_is_finite, [shape], [dtype],
                                  op_attrs=[layout],
                                  kernel_name="fused_is_finite",
                                  attrs={"target": "cuda"})

    data, expect, output = gen_data(shape, dtype, layout)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Exemplo n.º 9
0
def test_ms_trans_data(shape, axes, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(trans_data_auto, [shape], [dtype],
                                  op_attrs=[axes],
                                  kernel_name="trans_data_auto",
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(trans_data_manual, [shape], [dtype],
                                  op_attrs=[axes],
                                  kernel_name="trans_data_manual")
    data, output, expect = gen_data(shape, axes, dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Exemplo n.º 10
0
def focalloss_ad_run(shape, p_dtype, t_dtype, gamma, kernel_name, attrs):
    head = np.random.rand(*shape[:2]).astype(p_dtype)

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(focalloss_ad, [head.shape, shape, shape], [p_dtype, p_dtype, t_dtype],
                                  op_attrs=[gamma], kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            expect, output, pred, targ = gen_data(gamma, head, p_dtype, shape, t_dtype)
            return mod, expect, (head, pred, targ, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(focalloss_ad, [head.shape, shape, shape], [p_dtype, p_dtype, t_dtype],
                                  op_attrs=[gamma], kernel_name=kernel_name, attrs=attrs)
        expect, output, pred, targ = gen_data(gamma, head, p_dtype, shape, t_dtype)
        output = utils.mod_launch(mod, [head, pred, targ, output], expect=expect)
        return [head, pred, targ, gamma], output, expect, compare_tensor(output, expect, rtol=5e-03, atol=5e-03,
                                                                         equal_nan=True)
Exemplo n.º 11
0
def test_ms_addn(shape, dtype, n, poly_sch=False):
    shapes = []
    for i in range(n):
        shapes.append(shape)
    if poly_sch:
        mod = utils.op_build_test(addn_auto, [shapes], [dtype], attrs={"target": "cuda"}, kernel_name="addn_auto")
    else:
        mod = utils.op_build_test(addn_manual, [shapes], [dtype], kernel_name="addn_manual")
    expect, inputs, output = gen_data(shape, shapes, dtype, n)
    output = utils.mod_launch(mod, (*inputs, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Exemplo n.º 12
0
def cholesky_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build(cholesky.cholesky, [shape], [dtype], kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            exp_output, inputs, output = gen_data(dtype, shape)
            return mod, exp_output, (inputs, output)
        else:
            return mod
    else:
        # op_attrs=[shape, dtype]
        mod = utils.op_build(cholesky.cholesky, [shape], [dtype], kernel_name='cholesky', attrs=attrs)
        exp_output, inputs, output = gen_data(dtype, shape)
        # result_tvm
        acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)
        # 4) compare result
        TestCase_Result = np.allclose(acu_output, exp_output, rtol=5e-03, equal_nan=True)

        return inputs, acu_output, exp_output, TestCase_Result
Exemplo n.º 13
0
def test_ms_select(shape_cond, shape_x, dtype_cond, dtype_x, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(select_auto, [shape_cond, shape_x, shape_x],
                                  [dtype_cond, dtype_x, dtype_x],
                                  kernel_name="select_auto",
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(select_manual,
                                  [shape_cond, shape_x, shape_x],
                                  [dtype_cond, dtype_x, dtype_x],
                                  kernel_name="select_manual")
    expect, cond, x1, x2, output = gen_data(shape_cond, shape_x, dtype_cond,
                                            dtype_x)
    output = utils.mod_launch(mod, (cond, x1, x2, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
Exemplo n.º 14
0
def test_ms_exp(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(exp, [shape], [dtype],
                                  attrs={"target": "cuda"},
                                  kernel_name="exp")

    data, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    return True
Exemplo n.º 15
0
def reverse_run(shape, dtype, axis, attrs=None):
    """reduce_any_d_run implementation"""
    if attrs is None:
        attrs = {}

    mod = utils.op_build_test(reverse.reverse, [shape], [dtype],
                              kernel_name='reverse',
                              op_attrs=[axis],
                              attrs=attrs)
    args, exp_output, x = gen_data(dtype, shape, axis)
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    # compare result
    rtol, atol = get_rtol_atol("reverse", dtype)
    testcase_result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return x, acu_output, exp_output, testcase_result
Exemplo n.º 16
0
def test_ms_rsqrt(shape1, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(rsqrt_auto, (shape1, ), (dtype, ),
                                  attrs={"target": "cuda"},
                                  kernel_name="rsqrt_auto")
    else:
        mod = utils.op_build_test(rsqrt_manual, (shape1, ), (dtype, ),
                                  kernel_name="rsqrt_auto")
    expect, input1, output = gen_data(dtype, shape1)
    args = (input1, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    input1, expect = to_tvm_nd_array([input1, expect])
    gpu_profiling(mod, input1, expect, 400)
Exemplo n.º 17
0
def less_equal_execute(shapes, dtype, kernel_name, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = less_equal_compile(shapes, dtype, kernel_name, attrs, tuning=t)
        if t:
            expect, inputs, output = gen_data(dtype, shapes)
            return mod, expect, (inputs + output)
        else:
            return mod
    else:
        mod = less_equal_compile(shapes, dtype, kernel_name, attrs)
        expect, inputs, output = gen_data(dtype, shapes)
        output = utils.mod_launch(mod, inputs + [output], expect=expect)
        rtol, atol = get_rtol_atol("less_equal", dtype)
        return inputs, output, expect, compare_tensor(output,
                                                      expect,
                                                      rtol=rtol,
                                                      atol=atol,
                                                      equal_nan=True)
Exemplo n.º 18
0
def get_result(desc, attrs=None):
    input_for_mod, expect, output_indexes = gen_json_data(desc)

    if attrs:
        mod = composite.build(desc, attrs)
    else:
        mod = composite.build(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    rtol, atol = get_rtol_atol("FUSED", "float32")
    flag = True
    if len(output_indexes) > 1:
        if not all(
                map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol),
                    output, expect)):
            flag = False
    else:
        if not compare_tensor(output, expect, rtol=rtol, atol=atol):
            flag = False
    return flag
Exemplo n.º 19
0
def strided_slice_ad_run(input_shape, begin, end, strides, dtype, attrs_op={}, cce_path="./", attrs={}):
    out_shape = [(e - b) // s for b, e, s in zip(begin, end, strides)]
    attrs.update(attrs_op)
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(strided_slice_ad.strided_slice_ad, [out_shape, input_shape], [dtype, dtype],
                                  [begin, end, strides, dtype], kernel_name, attrs, tuning=t)
        if t:
            H_data, expect, input1, output = gen_data(begin, dtype, end, input_shape, out_shape, strides)
            return mod, expect, (H_data, input1, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(strided_slice_ad.strided_slice_ad, [out_shape, input_shape], [dtype, dtype],
                                  [begin, end, strides, dtype], "strided_slice_ad", attrs)
        H_data, expect, input1, output = gen_data(begin, dtype, end, input_shape, out_shape, strides)
        output = utils.mod_launch(mod, (H_data, input1, output), expect=expect)

        return (H_data, input1), output, expect, compare_tensor(output, expect, rtol=0.1, equal_nan=True)
Exemplo n.º 20
0
def square_difference_ad_run(shape1,
                             shape2,
                             dtype,
                             kernel_name,
                             attrs,
                             cce_path="./"):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        expect, input1, input2, out_shape, support_list = gen_input_data(
            dtype, shape1, shape2)
        mod = utils.op_build_test(square_difference_ad.square_difference_ad,
                                  input_shapes=[out_shape, shape1, shape2],
                                  input_types=[dtype, dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, head_np, output = gen_data(dtype, expect, out_shape,
                                               support_list)
            return mod, expect, (head_np, input1, input2, output)
        else:
            return mod
    else:
        expect, input1, input2, out_shape, support_list = gen_input_data(
            dtype, shape1, shape2)
        expect, head_np, output = gen_data(dtype, expect, out_shape,
                                           support_list)
        mod = utils.op_build_test(square_difference_ad.square_difference_ad,
                                  input_shapes=[out_shape, shape1, shape2],
                                  input_types=[dtype, dtype, dtype],
                                  kernel_name='square_difference_ad',
                                  attrs=attrs)
        output = utils.mod_launch(mod, (head_np, input1, input2, output),
                                  expect=expect)

        return (head_np, input1,
                input2), output, expect, compare_tensor(output,
                                                        expect,
                                                        rtol=5e-03,
                                                        equal_nan=True)
Exemplo n.º 21
0
def csr_gather_run(shape, dtype1, dtype2, nnz=-1, poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    # gen data
    op_attrs = [shape]
    dense, col_idx, row_idx, expect = gen_data(shape, dtype1, dtype2, nnz=nnz)
    output_shape = expect.shape
    attrs["is_csr"] = True

    mod = utils.op_build_test(csr_gather,
                              [shape, col_idx.shape, row_idx.shape],
                              [dtype1, dtype2, dtype2],
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs,
                              kernel_name="csr_gather")

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (dense, col_idx, row_idx, output),
                              expect=expect)
    atol, rtol = get_rtol_atol("csr_gather", dtype1)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        args_list = to_tvm_nd_array([dense, col_idx, row_idx, output, expect],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_time"])
    return (dense, col_idx, row_idx), output, expect, res
Exemplo n.º 22
0
def test_composite_stitch(ci_path):
    files = os.listdir(ci_path)
    flag = True
    for fi in files:
        with open(os.path.join(ci_path, fi), 'r') as f:
            print(
                "\033[94m%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%file: \033[0m",
                fi)
            desc = f.read()
        poly = True
        attrs = {}
        reduce_lib_key = "enable_akg_reduce_lib"
        attrs[reduce_lib_key] = poly
        mod = composite.build(desc, attrs, poly=poly)
        rtol = 0.001
        atol = 0.005
        max_run_times = 3
        case_flag = False

        for i in range(max_run_times):
            input_for_mod, expect, output_indexes = gen_json_data(desc)
            output = utils.mod_launch(mod, input_for_mod, output_indexes)
            if len(output_indexes) > 1:
                if all(
                        map(
                            lambda x, y: compare_tensor(
                                x, y, rtol=rtol, atol=atol), output, expect)):
                    case_flag = True
                    break
            else:
                if compare_tensor(output, expect, rtol=rtol, atol=atol):
                    case_flag = True
                    break
        if not case_flag:
            logging.info("\033[91mComposite Json {} fail!\033[0m".format(fi))
        else:
            logging.info("\033[92mComposite Json {} pass!\033[0m".format(fi))
        flag &= case_flag
    if not flag:
        raise ValueError("Precision Error")
    logging.info("All ops are ok!")
Exemplo n.º 23
0
def test_fused_bn_update_grad(shape,
                              out_shape,
                              dtype="float16",
                              out_dtype="float32",
                              layout="NHWC",
                              poly_sch=False):
    shape_list = [shape, out_shape, shape]
    dtype_list = [dtype, out_dtype, dtype]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build_test(fused_bn_update_grad,
                                  shape_list,
                                  dtype_list,
                                  op_attrs=op_attrs,
                                  kernel_name="fused_bn_update_grad",
                                  attrs={
                                      "target": "cuda",
                                      "enable_akg_reduce_lib": True,
                                      "enable_atomic_add": True
                                  })

    head, data_sum, in_bn, output, expect = gen_data(shape, out_shape, dtype,
                                                     out_dtype, layout)
    outputs = [output, output]
    inputs = [head, data_sum, in_bn]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod,
                               arg_list,
                               outputs=tuple(range(-len(outputs), 0)),
                               expect=expect)

    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)
Exemplo n.º 24
0
def abs_run(shape, dtype, attrs):

    # Result_Numpy
    input_shape = [shape]
    input_dtype = [dtype]

    op_attrs = [shape, dtype]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(abs.abs_value,
                                  input_shape,
                                  input_dtype,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            exp_output, inputs, output = gen_date(dtype, shape)
            return mod, exp_output, (inputs, output)
        else:
            return mod
    else:
        # result_tvm
        mod = utils.op_build_test(abs.abs_value,
                                  input_shape,
                                  input_dtype,
                                  kernel_name='abs',
                                  attrs=attrs)
        exp_output, inputs, output = gen_date(dtype, shape)
        acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)

        # compare result
        rtol, atol = get_rtol_atol("abs", dtype)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        return inputs, acu_output, exp_output, TestCase_Result
Exemplo n.º 25
0
def matmul4d_ad_run(shape_x, shape_y, bias, adj_x, adj_y, dtype, out_dtype,
                    kernel_name, attrs):

    # calculate the shape in fractal type and create the data
    batch_tuple, m, k, n = extract_dim(shape_x, shape_y, adj_x, adj_y)

    m = (m + 15) // 16 * 16
    n = (n + 15) // 16 * 16
    k = (k + 15) // 16 * 16

    shape_xx, shape_yy, bias_shape, output_shape, k = get_converted_shapes(
        m, n, k, batch_tuple, adj_x, adj_y, bias)

    input_x = random_gaussian(shape_xx, miu=0.5, sigma=0.01).astype(np.float16)
    input_y = random_gaussian(shape_yy, miu=0.5, sigma=0.01).astype(np.float16)
    input_head = random_gaussian(output_shape, miu=0.5,
                                 sigma=0.01).astype(np.float16)

    dX_expected = compute_expected(input_y, input_head, adj_x, adj_y, shape_xx)

    input_shapes = [output_shape, shape_xx, shape_yy, bias_shape]
    input_types = [out_dtype, dtype, dtype, dtype]
    op_attrs = [out_dtype, adj_x, adj_y]
    if bias_shape is None:
        input_shapes = [output_shape, shape_xx, shape_yy]
        input_types = [out_dtype, dtype, dtype]
        op_attrs = [None, out_dtype, adj_x, adj_y]

    mod = utils.op_build_test(matmul4d_ad.matmul4d_ad, input_shapes,
                              input_types, op_attrs, kernel_name, attrs)

    # calculate the backward kernel
    dX = np.full(shape_xx, np.nan, dtype)
    dX = utils.mod_launch(mod, (input_head, input_x, input_y, dX),
                          expect=dX_expected)

    return (input_x, input_y,
            input_head), dX, dX_expected, compare_tensor(dX,
                                                         dX_expected,
                                                         rtol=0.01,
                                                         equal_nan=True)
Exemplo n.º 26
0
def test_fused_bn_update(shape,
                         dtype="float32",
                         c1=(1 / (256 * 7 * 7)),
                         c2=1.001e-05,
                         c3=1.00007975,
                         c4=0.100000024,
                         poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1, c2, c3, c4)
    attrs = [dtype, c1, c2, c3, c4]
    shapes = [input[0].shape] * 4
    dtypes = [dtype] * 4
    if poly_sch:
        mod = utils.op_build_test(fused_bn_update_auto,
                                  shapes,
                                  dtypes,
                                  kernel_name="fused_bn_update_auto",
                                  op_attrs=attrs,
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(fused_bn_update_manual,
                                  shapes,
                                  dtypes,
                                  kernel_name="fused_bn_update_manual",
                                  op_attrs=attrs)
    outputs = [np.full(shape, np.nan, dtype)] * 3
    attrs_list = input + outputs
    output = utils.mod_launch(mod,
                              attrs_list,
                              outputs=(range(-len(outputs), 0)),
                              expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)
Exemplo n.º 27
0
def minimum_run(shape1, shape2, dtype, attrs_op=None, attrs=None):
    """minimum_run"""
    if attrs_op is not None:
        if attrs is not None:
            attrs.update(attrs_op)
        else:
            attrs = attrs_op
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(minimum, [shape1, shape2], [dtype, dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, lhd, output, rhd = gen_data(dtype, shape1, shape2)
            return mod, expect, (lhd, rhd, output)
        return mod
    else:
        mod = utils.op_build_test(minimum, [shape1, shape2], [dtype, dtype],
                                  kernel_name='minimum',
                                  attrs=attrs)
        expect, lhd, output, rhd = gen_data(dtype, shape1, shape2)
        # result_tvm
        output = utils.mod_launch(mod, (lhd, rhd, output), expect=expect)
        if attrs.get("profiling", False):
            import akg
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([lhd, rhd, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        # compare result
        compare_result = compare_tensor(output,
                                        expect,
                                        rtol=5e-03,
                                        equal_nan=True)

        return (lhd, rhd), output, expect, compare_result
Exemplo n.º 28
0
def confusion_matrix_run(actual_shape,
                         actual_dtype,
                         predict_shape,
                         predict_dtype,
                         num_class,
                         kernel_name="confusion_matrix",
                         attrs=None):
    # Create op
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(confusion_matrix.confusion_matrix,
                                  [actual_shape, predict_shape],
                                  [actual_dtype, predict_dtype],
                                  op_attrs=[num_class],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            actual_data, expect_data, output_data, predict_data = gen_data(
                actual_dtype, actual_shape, num_class, predict_dtype,
                predict_shape)
            return mod, expect_data, (actual_data, predict_data, output_data)
        else:
            return mod
    else:
        mod = utils.op_build_test(confusion_matrix.confusion_matrix,
                                  [actual_shape, predict_shape],
                                  [actual_dtype, predict_dtype],
                                  op_attrs=[num_class],
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        actual_data, expect_data, output_data, predict_data = gen_data(
            actual_dtype, actual_shape, num_class, predict_dtype,
            predict_shape)
        output_data = utils.mod_launch(
            mod, (actual_data, predict_data, output_data), expect=expect_data)

        return (actual_data,
                predict_data), output_data, expect_data, compare_tensor(
                    expect_data, output_data)
Exemplo n.º 29
0
def fused_batch_norm_grad_run(shape, dtype, eps, data_format, axis, kernel_name, attrs):
    is_special5D = (data_format == "NC1HWC0")

    if is_special5D:
        axes = (0, 2, 3)
        param_shape = [1, shape[1], 1, 1, shape[4]]
    else:
        tmp_axis = axis if axis >= 0 else len(shape) + axis
        axes = tuple([i for i in range(len(shape)) if i != tmp_axis])
        param_shape = [shape[axis]]

    shapes = [shape, shape, param_shape, param_shape, param_shape]
    dtypes = [dtype, dtype, dtype, dtype, dtype]
    op_attrs = [eps, data_format, axis]

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(fused_batch_norm_grad, [shapes], dtypes, op_attrs,
                                  kernel_name=kernel_name, attrs=attrs, tuning=t)
        if t:
            data, dy, expects, gamma, mean, outputs, var = gen_data(axes, dtype, eps, data_format,
                                                                    param_shape, shape, axis)
            return mod, expects, {"args": (dy, data, mean, var, gamma, *outputs),
                                  'outputs': tuple(range(-len(outputs), 0)), 'tuning': False}
        else:
            return mod
    else:
        mod = utils.op_build_test(fused_batch_norm_grad, [shapes], dtypes, op_attrs,
                                  kernel_name=kernel_name, attrs=attrs)

        data, dy, expects, gamma, mean, outputs, var = gen_data(axes, dtype, eps, data_format, param_shape,
                                                                shape, axis)
        outputs = utils.mod_launch(mod, (dy, data, mean, var, gamma, *outputs), outputs=tuple(range(-len(outputs), 0)),
                                   expect=expects)
        outputs = [outputs] if len(expects) == 1 else list(outputs)

        rtol, atol = get_rtol_atol("fused_batch_norm_grad", dtype)
        results = list(map(lambda x, y: np.allclose(x, y, rtol=rtol, atol=atol), outputs, expects))
        print("results", results)
        return (dy, data, gamma), outputs, expects, all(results)
Exemplo n.º 30
0
def test_ms_less_equal(shape1, shape2, in_dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(less_equal_auto, (shape1, shape2),
                                  (in_dtype, in_dtype),
                                  kernel_name="less_equal_auto",
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(less_equal_manual, (shape1, shape2),
                                  (in_dtype, in_dtype),
                                  kernel_name="less_equal_manual")
    lhs, rhs, output, expect = gen_data(shape1, shape2, in_dtype)
    args = (lhs, rhs, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)