Exemplo n.º 1
0
def test_fused_pad(shape,
                   pad_before,
                   pad_after,
                   layout='NHWC',
                   pad_value=0.0,
                   poly_sch=False):
    op_attrs = [pad_before, pad_after, layout, pad_value]
    if poly_sch:
        mod = utils.op_build(fused_pad_auto, [shape], ['float32'],
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_pad_manual, [shape], ['float32'],
                             op_attrs=op_attrs)
    data, output, expect = gen_data(shape, pad_before, pad_after, layout,
                                    pad_value)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(data)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, data, expect, 400)
Exemplo n.º 2
0
def test_fused_l2loss_grad(shape, layout, fill_data=4e-05, poly_sch=False):
    data_1 = gen_data(shape, 'float16')
    data_2 = gen_data(shape, 'float32')

    expect, output = compute_py(data_1, data_2, layout, fill_data)
    input_list = [shape, shape]
    dtype_list = ['float16', 'float32']
    op_attrs = [layout, fill_data]
    if poly_sch:
        mod = utils.op_build_test(fused_l2loss_grad,
                                  input_list,
                                  dtype_list,
                                  kernel_name="fused_l2loss_grad",
                                  op_attrs=op_attrs,
                                  attrs={"target": "cuda"})

    args = [data_1, data_2, output]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array([data_1, data_2])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)
Exemplo n.º 3
0
def test_fused_relu_grad(shape, c1=0, poly_sch=False):
    dtype = 'float16'
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1)
    shapes = [shape] * 3
    dtypes = [dtype] * 3
    attrs = [c1]
    if poly_sch:
        mod = utils.op_build(fused_relu_grad_auto,
                             shapes,
                             dtypes,
                             op_attrs=attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_manual,
                             shapes,
                             dtypes,
                             op_attrs=attrs)
    output = np.full(shape, np.nan, dtype)
    output = utils.mod_launch(mod, (*input, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)
Exemplo n.º 4
0
def test_fused_mul_div_rsqrt_mul_isfinite_red(shape,
                                              dtype='float32',
                                              poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input)
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    if poly_sch:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_auto,
                             input_shape,
                             input_dtype,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_manual,
                             input_shape, input_dtype)
    outputs = [np.full(
        (1, ), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3
    output = utils.mod_launch(mod, [*input, *outputs],
                              outputs=list(range(-len(outputs), 0)),
                              expect=expect)
    ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)
Exemplo n.º 5
0
def test_fused_bn_reduce(in_shape,
                         in_dtype='float16',
                         layout='NHWC',
                         out_dtype='float32',
                         poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build_test(fused_bn_reduce, [in_shape], [in_dtype],
                                  kernel_name="fused_bn_reduce",
                                  op_attrs=op_attrs,
                                  attrs={"target": "cuda"})

    data, outputs, expect = gen_data(in_shape, in_dtype, layout, out_dtype)
    inputs = [data]
    arglist = inputs + outputs
    output = utils.mod_launch(mod,
                              arglist,
                              outputs=tuple(range(-len(outputs), 0)),
                              expect=expect)

    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)
Exemplo n.º 6
0
def get_result(desc, poly, attrs=None):
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly
    if attrs == {}:
        mod = composite.build(desc, {'dim':"0 0 9728 9728"}, poly=poly)
    else:
        mod = composite.build(desc, attrs, poly=poly)
    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    rtol, atol = get_rtol_atol("FUSED", "float32")
    flag = True
    if len(output_indexes) > 1:
        if not all(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    else:
        if not compare_tensor(output, expect, rtol=rtol, atol=atol):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    desc_d = json.loads(desc)
    if desc_d["process"] == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return flag
def test_fused_relu_grad_bn_update_grad(shape, out_shape, dtype="float16", layout="NHWC", out_dtype="float32", poly_sch=False):
    shape_list = [out_shape, shape, shape, shape]
    dtype_list = [out_dtype, dtype, dtype, dtype]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build(
            fused_relu_grad_bn_update_grad_auto,
            shape_list,
            dtype_list,
            op_attrs=op_attrs,
            attrs={
                "target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_update_grad_manual, shape_list, dtype_list, op_attrs=op_attrs)
    
    head, data_sum, in_bn, in_active, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout)
    outputs = [output, output]
    inputs = [data_sum, in_bn, head, in_active]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)
    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)
Exemplo n.º 8
0
def get_result(desc, poly, attrs=None):
    backend = _get_backend(desc)
    if backend == "cuda" and not attrs:
        attrs = _add_attrs_from_json(desc, attrs, poly)
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly

    build_attrs = attrs if attrs else None
    mod = composite.build(desc, build_attrs, poly=poly)

    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    if not all(
            map(_compare_func, output if isinstance(output, (list, tuple)) else
                [output], expect if isinstance(expect,
                                               (list, tuple)) else [expect])):
        logging.info(mod.imported_modules[0].get_source())
        return False
    if backend == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return True
def test_fused_relu_grad_bn_double_reduce_grad(shape, out_shape, dtype="float32", layout="NHWC", out_dtype="float16", poly_sch=False):
    
    shape_list = [shape] * 5 + [out_shape] + [shape] * 3 + [out_shape] + [shape] * 3 + [out_shape] * 3
    dtype_list = [dtype] * 5 +[out_dtype] +[dtype] * 3 + [out_dtype] + [dtype] * 3 +[out_dtype] * 3
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build_test(
            fused_relu_grad_bn_double_reduce_grad,
            shape_list,
            dtype_list,
            op_attrs=op_attrs,
            kernel_name="fused_relu_grad_bn_double_reduce_grad",
            attrs={
                "target": "cuda"})

    inshp_data, outshp_data, output, expect = gen_data(shape, out_shape, dtype, out_dtype)
    inputs = [inshp_data] * 5 + [outshp_data] + [inshp_data] * 3 + [outshp_data] + [inshp_data] * 3 + [outshp_data] * 3
    outputs = [output, output]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)

    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)
Exemplo n.º 10
0
def test_fused_bn_double_follow_relu(in_shape, in_dtype='float16', layout='NHWC', out_dtype='float16', poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype)
    input_shape_list = [i.shape for i in inputs]
    input_dtype_list = [inter_dtype] * 4 + [in_dtype] + [inter_dtype] * 4 + [in_dtype]
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build(fused_bn_double_follow_relu_auto, input_shape_list, input_dtype_list,
                             op_attrs=op_attrs, attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_bn_double_follow_relu_manual, input_shape_list, input_dtype_list, op_attrs=op_attrs)


    outputs = [output]
    arglist = inputs + outputs
    output = utils.mod_launch(mod, arglist, expect=expect)
    
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
def test_fused_relu_grad_bn_reduce_grad(shape_1,
                                        shape_2,
                                        layout='NHWC',
                                        poly_sch=False):
    data_1 = gen_data(shape_1, 'float32')
    data_2 = gen_data(shape_1, 'float32')
    data_3 = gen_data(shape_1, 'float32')
    data_4 = gen_data(shape_1, 'float32')
    data_5 = gen_data(shape_1, 'float32')
    data_6 = gen_data(shape_1, 'float32')
    data_7 = gen_data(shape_2, 'float16')
    data_8 = gen_data(shape_2, 'float16')
    data_9 = gen_data(shape_2, 'float16')

    expect, output = compute_py(data_1, data_2, data_3, data_4, data_5, data_6,
                                data_7, data_8, data_9, layout)
    input_list = [
        shape_1, shape_1, shape_1, shape_1, shape_1, shape_1, shape_2, shape_2,
        shape_2
    ]
    dtype_list = [
        'float32', 'float32', 'float32', 'float32', 'float32', 'float32',
        'float16', 'float16', 'float16'
    ]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build_test(
            fused_relu_grad_bn_reduce_grad_auto,
            input_list,
            dtype_list,
            kernel_name="fused_relu_grad_bn_reduce_grad_auto",
            op_attrs=op_attrs,
            attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(
            fused_relu_grad_bn_reduce_grad_manual,
            input_list,
            dtype_list,
            kernel_name="fused_relu_grad_bn_reduce_grad_manual",
            op_attrs=op_attrs)
    args = [
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9,
        output
    ]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-08)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array([
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9
    ])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Exemplo n.º 12
0
def test_fused_relu_grad_bn_double_update_grad(shape_f16,
                                               shape_f32,
                                               layout='NHWC',
                                               poly_sch=False):
    data_1 = gen_data(shape_f32, 'float32')
    data_2 = gen_data(shape_f16, 'float16')
    data_3 = gen_data(shape_f32, 'float32')
    data_4 = gen_data(shape_f16, 'float16')
    data_5 = gen_data(shape_f16, 'float16')
    data_6 = gen_data(shape_f16, 'float16')
    data_7 = gen_data(shape_f16, 'float16')
    shape_list = [
        shape_f32, shape_f16, shape_f32, shape_f16, shape_f16, shape_f16,
        shape_f16
    ]
    dtype_list = [
        'float32', 'float16', 'float32', 'float16', 'float16', 'float16',
        'float16'
    ]
    data_list = [data_1, data_2, data_3, data_4, data_5, data_6, data_7]
    data_tmp7, data_tmp15, data_tmp22, out_shape = compute_py(
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, layout)
    expect = [data_tmp7, data_tmp15, data_tmp22]
    output = np.full(out_shape, np.nan, 'float32')
    output = [output, output, output]

    if poly_sch:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_auto,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_manual,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout])

    output = utils.mod_launch(
        mod, (data_1, data_2, data_3, data_4, data_5, data_6, data_7, *output),
        outputs=tuple(range(-len(output), 0)),
        expect=expect)

    res = True
    res &= np.allclose(output[0], expect[0], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[1], expect[1], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[2], expect[2], rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data_list = to_tvm_nd_array(data_list)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data_list, *expect, 400)
Exemplo n.º 13
0
def expand_dims_run(shape, axis, dtype, kernel_name="expand_dims", attrs={}):
    op_attr = [axis]
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(ExpandDims, [shape], [dtype],
                                  op_attr,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, input, output = gen_data(axis, dtype, shape)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        mod = utils.op_build_test(ExpandDims, [shape], [dtype],
                                  op_attr,
                                  kernel_name=kernel_name,
                                  attrs=attrs)
        expect, input, output = gen_data(axis, dtype, shape)
        output = utils.mod_launch(mod, (input, output), expect=expect)
        if attrs.get("profiling", False):
            import akg
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, output, expect, compare_tensor(output,
                                                     expect,
                                                     rtol=5e-03,
                                                     equal_nan=True)
Exemplo n.º 14
0
def standard_normal_run(seed, shape, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    mod = utils.op_build_test(standard_normal, [], [],
                              kernel_name="standard_normal",
                              op_attrs=[seed, shape],
                              attrs=attrs)

    output, expect = gen_data(shape)
    output = utils.mod_launch(mod, (output, ), expect=expect)
    res = output.shape == expect.shape
    res &= abs(np.mean(output) - 0) < 1e-1
    res &= abs(np.std(output) - 1) < 1e-1
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        output = to_tvm_nd_array(output, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return output, output, expect, res
Exemplo n.º 15
0
def csrmv_run(shape1, dtype1, shape2, dtype2, poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    if attrs["target"] == "cuda":
        attrs["enable_akg_reduce_lib"] = True
        attrs["enable_atomic_add"] = True
    data, indices, indptr, weight, expect = gen_data(shape1, dtype1, shape2, dtype2)
    attrs["is_csr"] = True

    mod = utils.op_build_test(csr_mv, [data.shape, indices.shape, indptr.shape, weight.shape],
                              ["float32", "int32", "int32", "float32"], polyhedral=poly_sch,
                              attrs=attrs, kernel_name='csrmv')
    
    output_shape = expect.shape
    output = np.zeros(output_shape, dtype="float32")
    output = utils.mod_launch(mod, (data, indices, indptr, weight, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        args_list = to_tvm_nd_array([data, indices, indptr, weight, output], akg.tvm.context(target_name, 0))
        target_profiling(mod, *args_list, target=target_name,  repeat_time=attrs["repeat_times"])
    return (data, indices, indptr, weight), output, expect, res
Exemplo n.º 16
0
def select_run(shape_cond, shape_x, dtype_cond, dtype_x, attrs=None):
    """select_run implementation"""
    if attrs is None:
        attrs = {}

    mod = utils.op_build_test(select, [shape_cond, shape_x, shape_x],
                              [dtype_cond, dtype_x, dtype_x],
                              kernel_name='select',
                              op_attrs=[],
                              attrs=attrs)
    args, exp_output, cond, x1, x2 = gen_data(shape_cond, shape_x, dtype_cond,
                                              dtype_x)
    acu_output = utils.mod_launch(mod, args, expect=exp_output)
    if attrs.get("profiling", False):
        import akg
        target_name = attrs["target"].split()[0]
        args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    # compare result
    rtol, atol = get_rtol_atol("select", dtype_x)
    testcase_result = compare_tensor(acu_output,
                                     exp_output,
                                     rtol=rtol,
                                     atol=atol,
                                     equal_nan=True)

    return [cond, x1, x2], acu_output, exp_output, testcase_result
Exemplo n.º 17
0
def fused_is_finite_run(shape, layout='NHWC', poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    dtype = "float32"
    mod = utils.op_build_test(fused_is_finite, [shape], [dtype],
                              op_attrs=[layout],
                              kernel_name="fused_is_finite",
                              polyhedral=poly_sch,
                              attrs=attrs)

    data, expect, output = gen_data(shape, dtype, layout)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data, output = to_tvm_nd_array([data, output],
                                       akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         data,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return data, output, expect, res
Exemplo n.º 18
0
def fused_relu_grad_run(shape, c1=0, poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    dtype='float16'
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1)
    shapes = [shape] * 3
    dtypes = [dtype] * 3
    op_attrs = [c1]
    mod = utils.op_build_test(fused_relu_grad, shapes, dtypes, op_attrs=op_attrs, kernel_name="fused_relu_grad",
                        polyhedral=poly_sch, attrs=attrs)

    output = np.full(shape, np.nan, dtype)
    output = utils.mod_launch(mod, (*input, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data = to_tvm_nd_array([*input, output], akg.tvm.context(target_name, 0))
        target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"])
    return input, output, expect, res
Exemplo n.º 19
0
def fused_mul_div_rsqrt_mul_isfinite_red_run(shape, dtype='float32', poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    inputs = gen_data(shape, dtype)
    expect = compute_expect(inputs)
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    mod = utils.op_build_test(fused_mul_div_rsqrt_mul_isfinite_red, input_shape, input_dtype,
                          kernel_name="fused_mul_div_rsqrt_mul_isfinite_red", polyhedral=poly_sch, attrs=attrs)

    outputs = [np.full((1,), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3
    output = utils.mod_launch(mod, [*inputs, *outputs], outputs=list(range(-len(outputs), 0)), expect=expect)
    ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08)
    print("Test {}".format("Pass" if ret else "Failed"))
    target_name = attrs["target"].split()[0]
    if not ret:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data = to_tvm_nd_array([*inputs, *outputs], akg.tvm.context(target_name, 0))
        target_profiling(mod, *data, target=target_name, repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, ret
Exemplo n.º 20
0
def fused_bn_update_grad_run(shape, out_shape, dtype="float16", out_dtype="float32", layout="NHWC", poly_sch=True, attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    attrs.update({"enable_akg_reduce_lib": True, "enable_atomic_add": True})
    shape_list = [shape, out_shape, shape]
    dtype_list = [dtype, out_dtype, dtype]
    op_attrs = [layout]
    mod = utils.op_build_test(fused_bn_update_grad, shape_list, dtype_list, op_attrs=op_attrs, kernel_name="fused_bn_update_grad",
                              polyhedral=poly_sch, attrs=attrs)

    head, data_sum, in_bn, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout)
    outputs = [output, output]
    inputs = [head, data_sum, in_bn]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)

    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        arg_list = to_tvm_nd_array(arg_list, akg.tvm.context(target_name, 0))
        target_profiling(mod, *arg_list, target=target_name, repeat_time=attrs["repeat_times"])
    return inputs, outputs, expect, res
Exemplo n.º 21
0
def sqrt_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(sqrt, [shape], [dtype],
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            expect, input, output = gen_data(dtype, shape)
            return mod, expect, (input, output)
        else:
            return mod
    else:
        expect, input, output = gen_data(dtype, shape)
        mod = utils.op_build_test(sqrt, [shape], [dtype],
                                  kernel_name='sqrt',
                                  attrs=attrs)
        output = utils.mod_launch(mod, (input, output), expect=expect)
        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, output, expect, compare_tensor(output,
                                                     expect,
                                                     rtol=5e-03,
                                                     equal_nan=True)
Exemplo n.º 22
0
def tensor_scatter_add_run(data_shape,
                           data_type,
                           indices_shape,
                           indices_type,
                           axis,
                           poly_sch=True,
                           attrs=None):
    op_attrs = [axis]
    default_attrs = {"target": "cuda"}
    if attrs:
        default_attrs.update(attrs)
    if len(indices_shape) > 1:
        updates_shape = indices_shape[:-1] + data_shape[indices_shape[-1]:]
    else:
        updates_shape = indices_shape + data_shape[1:]

    mod = utils.op_build_test(tensor_scatter_add,
                              [data_shape, indices_shape, updates_shape],
                              [data_type, indices_type, data_type],
                              attrs=default_attrs,
                              kernel_name="tensor_scatter_add",
                              polyhedral=poly_sch)

    # gen data
    indices_shape = indices_shape + (1, ) if len(
        indices_shape) == 1 else indices_shape
    params, indices, updates, expect = gen_data(data_shape, data_type,
                                                indices_shape, indices_type)
    output_shape = expect.shape

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (params, indices, updates, output),
                              expect=expect)

    atol, rtol = get_rtol_atol("tensor_scatter_add", data_type)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        params, indices, updates, output = to_tvm_nd_array(
            [params, indices, updates, output],
            akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         params,
                         indices,
                         updates,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (params, indices, updates), output, expect, res
Exemplo n.º 23
0
def reciprocal_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = reciprocal_compile(shape,
                                 dtype,
                                 attrs,
                                 kernel_name=kernel_name,
                                 tuning=t)
        if t:
            expect, input1, output = gen_data(dtype, shape)
            return mod, expect, (input1, output)
        else:
            return mod
    else:
        mod = reciprocal_compile(shape, dtype, attrs)
        expect, input1, output = gen_data(dtype, shape)
        output = utils.mod_launch(mod, (input1, output), expect=expect)
        if attrs["profiling"]:
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array([input1, output],
                                        akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        rtol, atol = get_rtol_atol("reciprocal", dtype)
        return (input1, ), output, expect, compare_tensor(output,
                                                          expect,
                                                          rtol=rtol,
                                                          atol=atol,
                                                          equal_nan=True)
Exemplo n.º 24
0
def test_ms_reduce_max(in_shape,
                       in_dtype,
                       axis=None,
                       keepdims=False,
                       poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(reduce_max, (in_shape, ), (in_dtype, ),
                                  op_attrs=[axis, keepdims],
                                  kernel_name="reduce_max",
                                  attrs={
                                      "target": "cuda",
                                      "enable_akg_reduce_lib": True,
                                      "enable_atomic_add": True
                                  })

    data, output, expect = gen_data(in_shape, in_dtype, axis, keepdims)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Exemplo n.º 25
0
def batch_matmul_run(shape1,
                     shape2,
                     dtype,
                     out_dtype="float32",
                     layout1="NHDT",
                     layout2="NHDT",
                     layout_out="NHDT",
                     shape_bias=None,
                     add_bias=False,
                     tensor_core=True,
                     poly_sch=True,
                     attrs=None):
    op_attrs = [out_dtype, layout1, layout2, layout_out, tensor_core, add_bias]

    default_attrs = attrs
    if not attrs:
        default_attrs = {"target": "cuda"}

    if default_attrs["target"] == "cuda" and tensor_core:
        default_attrs.update({
            "pragma_enable_matmul": True,
            "enable_auto_inline": False
        })
    elif default_attrs["target"] == "llvm":
        if "pragma_enable_matmul" not in default_attrs.keys():
            default_attrs["pragma_enable_matmul"] = True
        if "feature" not in default_attrs.keys():
            default_attrs["feature"] = "avx"

    mod = utils.op_build_test(BatchMatMul, (shape1, shape2, shape_bias),
                              (dtype, dtype, out_dtype),
                              op_attrs=op_attrs,
                              attrs=default_attrs,
                              polyhedral=poly_sch,
                              kernel_name="batch_matmul")

    lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype, out_dtype,
                                              layout1, layout2, layout_out,
                                              shape_bias, add_bias)
    args = (lhs, rhs, bias, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    target_name = default_attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        args = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (lhs, rhs, bias), output, expect, res
Exemplo n.º 26
0
def fused_gather_gather_add_mul_max_exp_scatter_add_run(
        input1_shape,
        input2_shape,
        input3_shape,
        input4_shape,
        data_dtype,
        indices_type,
        axis,
        poly_sch=True,
        attrs=None):
    op_attrs = [axis]
    default_attrs = {"target": "cuda"}
    if attrs:
        default_attrs.update(attrs)
    mod = utils.op_build_test(
        fused_gather_gather_add_mul_max_exp_scatter_add,
        [input1_shape, input2_shape, input3_shape, input4_shape],
        [data_dtype, indices_type, data_dtype, indices_type],
        op_attrs=op_attrs,
        attrs=default_attrs,
        polyhedral=poly_sch,
        kernel_name="fused_gather_gather_add_mul_max_exp_scatter_add",
    )

    # gen data
    input1, input2, input3, input4, expect1, expect2 = gen_data(
        input1_shape, input2_shape, input3_shape, input4_shape, data_dtype,
        indices_type, axis)

    output1 = np.zeros(expect1.shape, expect1.dtype)
    output2 = deepcopy(input1)
    output1, output2 = utils.mod_launch(
        mod, (input1, input2, input3, input4, output1, output2),
        outputs=(-2, -1))

    atol, rtol = get_rtol_atol(
        "fused_gather_gather_add_mul_max_exp_scatter_add", data_dtype)
    res = compare_tensor(output1, expect1, rtol=rtol, atol=atol)
    res &= compare_tensor(output2, expect2, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        inputs = to_tvm_nd_array(
            [input1, input2, input3, input4, output1, output2],
            akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *inputs,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (input1, input2, input3, input4), (output1, output2), (expect1,
                                                                  expect2), res
Exemplo n.º 27
0
def conv_fusion_run(shape_data,
                    shape_filter1,
                    shape_filter2,
                    stride1,
                    stride2,
                    padding1,
                    padding2,
                    dilation1,
                    dilation2,
                    dtype,
                    out_dtype="float32",
                    poly_sch=True,
                    attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    op_attrs = [stride1, stride2, padding1, padding2, dilation1, dilation2]
    attrs.update({
        "enable_auto_fuse": False,
        "shared_memory_tensors": "out input_1 input_2 input_3",
        "pragma_disable_loop_fusion": True,
        "dim": "3 0 1 1 3 1 1 1 3 2 4 4 3 3 52 52 3 4 64 64"
    })

    mod = utils.op_build_test(ConvFusion,
                              (shape_data, shape_filter1, shape_filter2),
                              (dtype, dtype, dtype),
                              op_attrs=op_attrs,
                              attrs=attrs,
                              polyhedral=poly_sch,
                              kernel_name="conv_fusion_auto")

    data, weight1, weight2, output, expect = fusion_gen_data(
        shape_data, shape_filter1, shape_filter2, stride1, stride2, padding1,
        padding2, dilation1, dilation2, dtype, out_dtype)
    args = (data, weight1, weight2, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1.e-8)
    print("Test {}".format("Pass"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")

    if attrs["profiling"]:
        data, weight1, weight2, output = to_tvm_nd_array(
            [data, weight1, weight2, output], akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         data,
                         weight1,
                         weight2,
                         output,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (data, weight1, weight2), output, expect, res
Exemplo n.º 28
0
def cast_run(shape, srcType, dstType, attrs={}):
    op_attrs = [dstType]
    if attrs.get("dynamic"):
        attrs["enable_double_buffer"] = False
        var_shape = []
        for i in range(len(shape)):
            var_shape.append(tvm.var("I" + str(i)))
        build_shape = var_shape
    else:
        build_shape = shape

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name=kernel_name,
                                  attrs=attrs,
                                  tuning=t)
        if t:
            args, exp_output, input = gen_data(dstType, shape, srcType)
            return mod, exp_output, args
        else:
            return mod
    else:
        mod = utils.op_build_test(Cast, [build_shape], [srcType],
                                  op_attrs,
                                  kernel_name='cast',
                                  attrs=attrs)
        args, exp_output, input = gen_data(dstType, shape, srcType)
        if attrs.get("dynamic"):
            for i in range(len(shape)):
                args.append(shape[i])
            block_dim = compute_blockdim(shape)
            args.append(block_dim)
        acu_output = utils.mod_launch(mod,
                                      args,
                                      outputs=(1, ),
                                      expect=exp_output)
        # compare result
        rtol, atol = get_rtol_atol("cast", dstType)
        TestCase_Result = compare_tensor(acu_output,
                                         exp_output,
                                         rtol=rtol,
                                         atol=atol,
                                         equal_nan=True)

        if attrs.get("profiling", False):
            target_name = attrs["target"].split()[0]
            args_list = to_tvm_nd_array(args, akg.tvm.context(target_name, 0))
            target_profiling(mod,
                             *args_list,
                             target=target_name,
                             repeat_time=attrs["repeat_times"])
        return input, acu_output, exp_output, TestCase_Result
Exemplo n.º 29
0
def csr_reduce_sum_run(shape,
                       dtype1,
                       dtype2,
                       axis,
                       nnz=-1,
                       poly_sch=True,
                       attrs=None):
    if not attrs:
        attrs = {"target": "cuda"}
    if attrs["target"] == "cuda":
        attrs["enable_akg_reduce_lib"] = True
        attrs["enable_atomic_add"] = True
    op_attrs = [axis, shape]

    # gen data
    data, col_idx, row_idx, expect = gen_data(shape,
                                              dtype1,
                                              dtype2,
                                              axis,
                                              nnz=nnz)
    output_shape = expect.shape
    attrs["is_csr"] = True

    mod = utils.op_build_test(csr_reduce_sum,
                              [data.shape, col_idx.shape, row_idx.shape],
                              [dtype1, dtype2, dtype2],
                              op_attrs=op_attrs,
                              polyhedral=poly_sch,
                              attrs=attrs,
                              kernel_name="csr_reduce_sum")

    if len(expect.shape) == 0:
        output_shape = (1, )
    output = np.zeros(output_shape, expect.dtype)
    output = utils.mod_launch(mod, (data, col_idx, row_idx, output),
                              expect=expect)
    atol, rtol = get_rtol_atol("csr_reduce_sum", dtype1)
    res = compare_tensor(output, expect, rtol=rtol, atol=atol)
    print("Test {}".format("Pass" if res else "Failed"))
    target_name = attrs["target"].split()[0]
    if not res:
        mod_source = mod
        if target_name != "llvm":
            mod_source = mod.imported_modules[0]
        print("Error {}:========================".format(target_name))
        print(mod_source.get_source())
        raise AssertionError("Test fail")
    if attrs["profiling"]:
        args_list = to_tvm_nd_array([data, col_idx, row_idx, output],
                                    akg.tvm.context(target_name, 0))
        target_profiling(mod,
                         *args_list,
                         target=target_name,
                         repeat_time=attrs["repeat_times"])
    return (data, col_idx, row_idx), output, expect, res
Exemplo n.º 30
0
def test_ms_addn(shape, dtype, n, poly_sch=False):
    shapes = []
    for i in range(n):
        shapes.append(shape)
    if poly_sch:
        mod = utils.op_build_test(addn_auto, [shapes], [dtype], attrs={"target": "cuda"}, kernel_name="addn_auto")
    else:
        mod = utils.op_build_test(addn_manual, [shapes], [dtype], kernel_name="addn_manual")
    expect, inputs, output = gen_data(shape, shapes, dtype, n)
    output = utils.mod_launch(mod, (*inputs, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)