Python gpu_profiling示例，akg.utils.result_analysis.gpu_profiling Python示例

示例#1

0

显示文件

def test_fused_mul_div_rsqrt_mul_isfinite_red(shape,
                                              dtype='float32',
                                              poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input)
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    if poly_sch:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_auto,
                             input_shape,
                             input_dtype,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_manual,
                             input_shape, input_dtype)
    outputs = [np.full(
        (1, ), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3
    output = utils.mod_launch(mod, [*input, *outputs],
                              outputs=list(range(-len(outputs), 0)),
                              expect=expect)
    ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)

示例#2

0

显示文件

def test_fused_bn_double_follow_relu(in_shape, in_dtype='float16', layout='NHWC', out_dtype='float16', poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype)
    input_shape_list = [i.shape for i in inputs]
    input_dtype_list = [inter_dtype] * 4 + [in_dtype] + [inter_dtype] * 4 + [in_dtype]
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build(fused_bn_double_follow_relu_auto, input_shape_list, input_dtype_list,
                             op_attrs=op_attrs, attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_bn_double_follow_relu_manual, input_shape_list, input_dtype_list, op_attrs=op_attrs)


    outputs = [output]
    arglist = inputs + outputs
    output = utils.mod_launch(mod, arglist, expect=expect)
    
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)

示例#3

0

显示文件

文件： test_fused_pad.py 项目： zhuyawen/akg

def test_fused_pad(shape,
                   pad_before,
                   pad_after,
                   layout='NHWC',
                   pad_value=0.0,
                   poly_sch=False):
    op_attrs = [pad_before, pad_after, layout, pad_value]
    if poly_sch:
        mod = utils.op_build(fused_pad_auto, [shape], ['float32'],
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_pad_manual, [shape], ['float32'],
                             op_attrs=op_attrs)
    data, output, expect = gen_data(shape, pad_before, pad_after, layout,
                                    pad_value)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(data)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, data, expect, 400)

示例#4

0

显示文件

def test_fused_bn_reduce(in_shape,
                         in_dtype='float16',
                         layout='NHWC',
                         out_dtype='float32',
                         poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError('Layout not supported {} '.format(layout))

    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build_test(fused_bn_reduce, [in_shape], [in_dtype],
                                  kernel_name="fused_bn_reduce",
                                  op_attrs=op_attrs,
                                  attrs={"target": "cuda"})

    data, outputs, expect = gen_data(in_shape, in_dtype, layout, out_dtype)
    inputs = [data]
    arglist = inputs + outputs
    output = utils.mod_launch(mod,
                              arglist,
                              outputs=tuple(range(-len(outputs), 0)),
                              expect=expect)

    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)

示例#5

0

显示文件

文件： test_fused_relu_grad_bn_double_reduce_grad.py 项目： wxyhv/akg

def test_fused_relu_grad_bn_double_reduce_grad(shape, out_shape, dtype="float32", layout="NHWC", out_dtype="float16", poly_sch=False):
    
    shape_list = [shape] * 5 + [out_shape] + [shape] * 3 + [out_shape] + [shape] * 3 + [out_shape] * 3
    dtype_list = [dtype] * 5 +[out_dtype] +[dtype] * 3 + [out_dtype] + [dtype] * 3 +[out_dtype] * 3
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build_test(
            fused_relu_grad_bn_double_reduce_grad,
            shape_list,
            dtype_list,
            op_attrs=op_attrs,
            kernel_name="fused_relu_grad_bn_double_reduce_grad",
            attrs={
                "target": "cuda"})

    inshp_data, outshp_data, output, expect = gen_data(shape, out_shape, dtype, out_dtype)
    inputs = [inshp_data] * 5 + [outshp_data] + [inshp_data] * 3 + [outshp_data] + [inshp_data] * 3 + [outshp_data] * 3
    outputs = [output, output]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)

    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)

示例#6

0

显示文件

文件： test_fused_relu_grad_bn_update_grad.py 项目： zhuyawen/akg

def test_fused_relu_grad_bn_update_grad(shape, out_shape, dtype="float16", layout="NHWC", out_dtype="float32", poly_sch=False):
    shape_list = [out_shape, shape, shape, shape]
    dtype_list = [out_dtype, dtype, dtype, dtype]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build(
            fused_relu_grad_bn_update_grad_auto,
            shape_list,
            dtype_list,
            op_attrs=op_attrs,
            attrs={
                "target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_update_grad_manual, shape_list, dtype_list, op_attrs=op_attrs)
    
    head, data_sum, in_bn, in_active, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout)
    outputs = [output, output]
    inputs = [data_sum, in_bn, head, in_active]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)
    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)

示例#7

0

显示文件

文件： test_ms_reduce_max.py 项目： wxyhv/akg

def test_ms_reduce_max(in_shape,
                       in_dtype,
                       axis=None,
                       keepdims=False,
                       poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(reduce_max, (in_shape, ), (in_dtype, ),
                                  op_attrs=[axis, keepdims],
                                  kernel_name="reduce_max",
                                  attrs={
                                      "target": "cuda",
                                      "enable_akg_reduce_lib": True,
                                      "enable_atomic_add": True
                                  })

    data, output, expect = gen_data(in_shape, in_dtype, axis, keepdims)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#8

0

显示文件

文件： test_fused_relu_grad.py 项目： x200510iong/akg

def test_fused_relu_grad(shape, c1=0, poly_sch=False):
    dtype = 'float16'
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1)
    shapes = [shape] * 3
    dtypes = [dtype] * 3
    attrs = [c1]
    if poly_sch:
        mod = utils.op_build(fused_relu_grad_auto,
                             shapes,
                             dtypes,
                             op_attrs=attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_manual,
                             shapes,
                             dtypes,
                             op_attrs=attrs)
    output = np.full(shape, np.nan, dtype)
    output = utils.mod_launch(mod, (*input, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)

示例#9

0

显示文件

def get_result(desc, poly, attrs=None):
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly
    if attrs == {}:
        mod = composite.build(desc, {'dim':"0 0 9728 9728"}, poly=poly)
    else:
        mod = composite.build(desc, attrs, poly=poly)
    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    rtol, atol = get_rtol_atol("FUSED", "float32")
    flag = True
    if len(output_indexes) > 1:
        if not all(map(lambda x, y: compare_tensor(x, y, rtol=rtol, atol=atol), output, expect)):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    else:
        if not compare_tensor(output, expect, rtol=rtol, atol=atol):
            logging.info(mod.imported_modules[0].get_source())
            flag = False
    desc_d = json.loads(desc)
    if desc_d["process"] == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return flag

示例#10

0

显示文件

def test_fused_l2loss_grad(shape, layout, fill_data=4e-05, poly_sch=False):
    data_1 = gen_data(shape, 'float16')
    data_2 = gen_data(shape, 'float32')

    expect, output = compute_py(data_1, data_2, layout, fill_data)
    input_list = [shape, shape]
    dtype_list = ['float16', 'float32']
    op_attrs = [layout, fill_data]
    if poly_sch:
        mod = utils.op_build_test(fused_l2loss_grad,
                                  input_list,
                                  dtype_list,
                                  kernel_name="fused_l2loss_grad",
                                  op_attrs=op_attrs,
                                  attrs={"target": "cuda"})

    args = [data_1, data_2, output]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array([data_1, data_2])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)

示例#11

0

显示文件

def get_result(desc, poly, attrs=None):
    backend = _get_backend(desc)
    if backend == "cuda" and not attrs:
        attrs = _add_attrs_from_json(desc, attrs, poly)
    if poly:
        reduce_lib_key = "enable_akg_reduce_lib"
        if reduce_lib_key not in attrs.keys():
            attrs[reduce_lib_key] = poly

    build_attrs = attrs if attrs else None
    mod = composite.build(desc, build_attrs, poly=poly)

    input_for_mod, expect, output_indexes = gen_json_data(desc)
    output = utils.mod_launch(mod, input_for_mod, output_indexes)

    if not all(
            map(_compare_func, output if isinstance(output, (list, tuple)) else
                [output], expect if isinstance(expect,
                                               (list, tuple)) else [expect])):
        logging.info(mod.imported_modules[0].get_source())
        return False
    if backend == "cuda":
        inputs = to_tvm_nd_array(input_for_mod)
        expect = to_tvm_nd_array(expect)
        gpu_profiling(mod, *inputs, *expect, repeat_time=400)
    return True

示例#12

0

显示文件

def get_gpu_cycles(mod, *mod_args, device_id=0, save_log=False):
    "get gpu profiling cycles."
    func = tvm.get_global_func('GPUProfilerInit')
    func("")
    from akg.utils.result_analysis import gpu_profiling
    gpu_profiling(mod, *mod_args, repeat_time=400, device_id=device_id)
    func = tvm.get_global_func('GPUProfilerStop')
    a = func()
    return int(a)

示例#13

0

显示文件

文件： test_fused_relu_grad_bn_reduce_grad.py 项目： Airmondia/akg

def test_fused_relu_grad_bn_reduce_grad(shape_1,
                                        shape_2,
                                        layout='NHWC',
                                        poly_sch=False):
    data_1 = gen_data(shape_1, 'float32')
    data_2 = gen_data(shape_1, 'float32')
    data_3 = gen_data(shape_1, 'float32')
    data_4 = gen_data(shape_1, 'float32')
    data_5 = gen_data(shape_1, 'float32')
    data_6 = gen_data(shape_1, 'float32')
    data_7 = gen_data(shape_2, 'float16')
    data_8 = gen_data(shape_2, 'float16')
    data_9 = gen_data(shape_2, 'float16')

    expect, output = compute_py(data_1, data_2, data_3, data_4, data_5, data_6,
                                data_7, data_8, data_9, layout)
    input_list = [
        shape_1, shape_1, shape_1, shape_1, shape_1, shape_1, shape_2, shape_2,
        shape_2
    ]
    dtype_list = [
        'float32', 'float32', 'float32', 'float32', 'float32', 'float32',
        'float16', 'float16', 'float16'
    ]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build_test(
            fused_relu_grad_bn_reduce_grad_auto,
            input_list,
            dtype_list,
            kernel_name="fused_relu_grad_bn_reduce_grad_auto",
            op_attrs=op_attrs,
            attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(
            fused_relu_grad_bn_reduce_grad_manual,
            input_list,
            dtype_list,
            kernel_name="fused_relu_grad_bn_reduce_grad_manual",
            op_attrs=op_attrs)
    args = [
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9,
        output
    ]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-08)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array([
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9
    ])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)

示例#14

0

显示文件

def test_fused_relu_grad_bn_double_update_grad(shape_f16,
                                               shape_f32,
                                               layout='NHWC',
                                               poly_sch=False):
    data_1 = gen_data(shape_f32, 'float32')
    data_2 = gen_data(shape_f16, 'float16')
    data_3 = gen_data(shape_f32, 'float32')
    data_4 = gen_data(shape_f16, 'float16')
    data_5 = gen_data(shape_f16, 'float16')
    data_6 = gen_data(shape_f16, 'float16')
    data_7 = gen_data(shape_f16, 'float16')
    shape_list = [
        shape_f32, shape_f16, shape_f32, shape_f16, shape_f16, shape_f16,
        shape_f16
    ]
    dtype_list = [
        'float32', 'float16', 'float32', 'float16', 'float16', 'float16',
        'float16'
    ]
    data_list = [data_1, data_2, data_3, data_4, data_5, data_6, data_7]
    data_tmp7, data_tmp15, data_tmp22, out_shape = compute_py(
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, layout)
    expect = [data_tmp7, data_tmp15, data_tmp22]
    output = np.full(out_shape, np.nan, 'float32')
    output = [output, output, output]

    if poly_sch:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_auto,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_manual,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout])

    output = utils.mod_launch(
        mod, (data_1, data_2, data_3, data_4, data_5, data_6, data_7, *output),
        outputs=tuple(range(-len(output), 0)),
        expect=expect)

    res = True
    res &= np.allclose(output[0], expect[0], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[1], expect[1], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[2], expect[2], rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data_list = to_tvm_nd_array(data_list)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data_list, *expect, 400)

示例#15

0

显示文件

文件： test_ms_add.py 项目： wxyhv/akg

def test_ms_add(shape1, shape2, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(add, (shape1, shape2), (dtype, dtype), kernel_name="add", attrs={"target": "cuda"})    
        
    lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)

示例#16

0

显示文件

def test_ms_tile(shape, multiples, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(tile_auto, [shape], [dtype], op_attrs=[multiples], kernel_name="tile_auto", attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(tile_manual, [shape], [dtype], op_attrs=[multiples], kernel_name="tile_manual")    
    data, output, expect = gen_data(shape, multiples, dtype)
    output = utils.mod_launch(mod, (data, output), expect = expect)
    ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#17

0

显示文件

def test_ms_round(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(round_auto, [shape], [dtype], attrs={"target": "cuda"}, kernel_name="round_auto")
    else:
        mod = utils.op_build_test(round_manual, [shape], [dtype], kernel_name="round_manual")
    data, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (data, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#18

0

显示文件

def test_ms_log(in_shape, in_dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(log_auto, (in_shape, ), (in_dtype, ), kernel_name="log_auto", attrs={"target":"cuda"})
    else:
        mod = utils.op_build_test(log_manual, (in_shape, ), (in_dtype, ), kernel_name="log_manual")
    data, output, expect = gen_data(in_shape, in_dtype)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-7) #  from 1e-8 changing to 1e-7
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#19

0

显示文件

文件： test_ms_batch_matmul.py 项目： wxyhv/akg

def test_ms_bmm(shape1,
                shape2,
                dtype,
                out_dtype="float32",
                layout1="NHDT",
                layout2="NHDT",
                layout_out="NHDT",
                shape_bias=None,
                add_bias=False,
                tensor_core=True,
                poly_sch=False,
                dim="",
                bind_block="",
                bind_thread=""):
    op_attrs = [out_dtype, layout1, layout2, layout_out, tensor_core, add_bias]

    if poly_sch:
        mod = utils.op_build_test(batch_matmul, (shape1, shape2, shape_bias),
                                  (dtype, dtype, out_dtype),
                                  op_attrs=op_attrs,
                                  attrs={
                                      "target": "cuda",
                                      "use_shared_memory": True,
                                      "pragma_enable_tensor_core": tensor_core,
                                      "enable_auto_fuse": False,
                                      "dim": dim,
                                      "bind_block": bind_block,
                                      "bind_thread": bind_thread,
                                      "vector_load_type": "float4",
                                      "pragma_enable_matmul": True
                                  },
                                  kernel_name="batch_matmul")

    lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype, out_dtype,
                                              layout1, layout2, layout_out,
                                              shape_bias, add_bias)
    args = (lhs, rhs, bias, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    lhs, rhs, bias, expect = to_tvm_nd_array([lhs, rhs, bias, expect])
    gpu_profiling(mod, lhs, rhs, bias, expect, repeat_time=10000)

示例#20

0

显示文件

文件： test_ms_expand_dims.py 项目： Airmondia/akg

def test_expand_dims(shape1, axis, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(expand_dims_auto, [shape1], [dtype], op_attrs=[axis], attrs={"target": "cuda"}, kernel_name="expand_dims_auto")    
    else:
        mod = utils.op_build_test(expand_dims_manual, [shape1], [dtype], op_attrs=[axis], kernel_name="expand_dims_manual")    
    expect, input1, output = gen_data(axis, dtype, shape1)
    args = (input1, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    input1, expect = to_tvm_nd_array([input1, expect])
    gpu_profiling(mod, input1, expect, 400)

示例#21

0

显示文件

文件： test_ms_minimum.py 项目： zhuyawen/akg

def test_ms_minimum(shape1, shape2, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(minimum_auto, (shape1, shape2), (dtype, dtype), attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(minimum_manual, (shape1, shape2), (dtype, dtype))    
    lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
    args = (lhs, rhs, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)

示例#22

0

显示文件

文件： test_ms_cast.py 项目： wxyhv/akg

def test_ms_cast(shape, srcType, dstType, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(cast, [shape], [srcType], [dstType],
                                  attrs={"target": "cuda"},
                                  kernel_name="cast")

    output, expect, inputs = gen_data(shape, srcType, dstType)
    output = utils.mod_launch(mod, (inputs, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs, expect = to_tvm_nd_array([inputs, expect])
    gpu_profiling(mod, inputs, expect, 400)

示例#23

0

显示文件

文件： test_ms_equal.py 项目： Airmondia/akg

def test_ms_equal(shapes, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(equal_auto,
                                  shapes, [dtype, dtype],
                                  kernel_name="equal_auto",
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(equal_manual,
                                  shapes, [dtype, dtype],
                                  kernel_name="equal_manual")
    inputs1, output1, expect1 = gen_data(shapes, dtype)
    output1 = utils.mod_launch(mod, (*inputs1, output1), expect=expect1)

    if shapes[0] == shapes[1]:
        inputs2 = []
        inputs2.append(inputs1[0])
        inputs2.append(inputs1[0])
        expect2 = np.equal(inputs2[0], inputs2[1])
        output2 = np.full(expect2.shape, 0, bool)
        output2 = utils.mod_launch(mod, (*inputs2, output2), expect=expect1)

        res = np.allclose(output1, expect1, rtol=5e-03,
                          atol=1.e-8) and np.allclose(
                              output2, expect2, rtol=5e-03, atol=1.e-8)
        print("Test {}".format("Pass" if res else "Fail"))
        if not res:
            print("Error cuda:========================")
            print(mod.imported_modules[0].get_source())
            raise AssertionError("Test fail")

        inputs1 = to_tvm_nd_array(inputs1)
        inputs2 = to_tvm_nd_array(inputs2)
        expect1 = to_tvm_nd_array(expect1)
        expect2 = to_tvm_nd_array(expect2)
        gpu_profiling(mod, *inputs1, expect1, *inputs2, expect2, 400)
    else:
        res = np.allclose(output1, expect1, rtol=5e-03, atol=1.e-8)
        print("Test {}".format("Pass" if res else "Fail"))
        if not res:
            print("Error cuda:========================")
            print(mod.imported_modules[0].get_source())
            raise AssertionError("Test fail")

        inputs1 = to_tvm_nd_array(inputs1)
        expect1 = to_tvm_nd_array(expect1)
        gpu_profiling(mod, *inputs1, expect1, 400)

示例#24

0

显示文件

文件： test_ms_trans_data.py 项目： wxyhv/akg

def test_ms_trans_data(shape, axes, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build_test(trans_data, [shape], [dtype],
                                  op_attrs=[axes],
                                  kernel_name="trans_data",
                                  attrs={"target": "cuda"})

    data, output, expect = gen_data(shape, axes, dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#25

0

显示文件

文件： test_ms_rsqrt.py 项目： zhuyawen/akg

def test_ms_rsqrt(shape1, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(rsqrt_auto, (shape1,), (dtype,), attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(rsqrt_manual, (shape1,), (dtype,))    
    expect, input1, output  = gen_data(dtype, shape1)
    args = (input1, output) 
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    input1, expect = to_tvm_nd_array([input1, expect])
    gpu_profiling(mod, input1, expect, 400)

示例#26

0

显示文件

def test_fused_is_finite(shape, layout='NHWC', poly_sch=False):

    if poly_sch:
        mod = utils.op_build(fused_is_finite_auto, [shape], ['float32'], op_attrs=[layout], attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(fused_is_finite_manual, [shape], ['float32'], op_attrs=[layout])    
    data, expect, output = gen_data(shape, 'float32', layout)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#27

0

显示文件

def test_ms_one_hot(shape, depth, dtype, on_value, off_value, axis, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(one_hot_auto, [shape], [dtype], op_attrs=[on_value, off_value, depth, axis, dtype], attrs={"target": "cuda"})
    else:
        mod = utils.op_build(one_hot_manual, [shape], [dtype], op_attrs=[on_value, off_value, depth, axis, dtype])

    # gen data
    expect, data_tmp, on_value_tensor, off_value_tensor, output = gen_data(axis, depth, dtype, shape, on_value, off_value)
    data = data_tmp.astype(dtype)
    output = utils.mod_launch(mod, (data, output), expect = expect)
    ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)

示例#28

0

显示文件

文件： test_ms_divide.py 项目： zhuyawen/akg

def test_ms_divide(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(divide_auto, [shape, shape], [dtype, dtype],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(divide_manual, [shape, shape], [dtype, dtype])
    lhs, rhs, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:==========================")
        print(mod.imported_modules[0].get_soure())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)

示例#29

0

显示文件

def test_ms_addn(shape, dtype, n, poly_sch=False):
    shapes = []
    for i in range(n):
        shapes.append(shape)
    if poly_sch:
        mod = utils.op_build_test(addn_auto, [shapes], [dtype], attrs={"target": "cuda"}, kernel_name="addn_auto")
    else:
        mod = utils.op_build_test(addn_manual, [shapes], [dtype], kernel_name="addn_manual")
    expect, inputs, output = gen_data(shape, shapes, dtype, n)
    output = utils.mod_launch(mod, (*inputs, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)

示例#30

0

显示文件

def test_fused_bn_update(shape,
                         dtype="float32",
                         c1=(1 / (256 * 7 * 7)),
                         c2=1.001e-05,
                         c3=1.00007975,
                         c4=0.100000024,
                         poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1, c2, c3, c4)
    attrs = [dtype, c1, c2, c3, c4]
    shapes = [input[0].shape] * 4
    dtypes = [dtype] * 4
    if poly_sch:
        mod = utils.op_build_test(fused_bn_update_auto,
                                  shapes,
                                  dtypes,
                                  kernel_name="fused_bn_update_auto",
                                  op_attrs=attrs,
                                  attrs={"target": "cuda"})
    else:
        mod = utils.op_build_test(fused_bn_update_manual,
                                  shapes,
                                  dtypes,
                                  kernel_name="fused_bn_update_manual",
                                  op_attrs=attrs)
    outputs = [np.full(shape, np.nan, dtype)] * 3
    attrs_list = input + outputs
    output = utils.mod_launch(mod,
                              attrs_list,
                              outputs=(range(-len(outputs), 0)),
                              expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)