def test_fused_relu_grad_bn_update_grad(shape, out_shape, dtype="float16", layout="NHWC", out_dtype="float32", poly_sch=False):
    shape_list = [out_shape, shape, shape, shape]
    dtype_list = [out_dtype, dtype, dtype, dtype]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build(
            fused_relu_grad_bn_update_grad_auto,
            shape_list,
            dtype_list,
            op_attrs=op_attrs,
            attrs={
                "target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_update_grad_manual, shape_list, dtype_list, op_attrs=op_attrs)
    
    head, data_sum, in_bn, in_active, output, expect = gen_data(shape, out_shape, dtype, out_dtype, layout)
    outputs = [output, output]
    inputs = [data_sum, in_bn, head, in_active]
    arg_list = inputs + outputs
    outputs = utils.mod_launch(mod, arg_list, outputs=tuple(range(-len(outputs), 0)), expect=expect)
    res = np.allclose(outputs, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, *expect, 400)
Пример #2
0
def test_fused_bn_double_follow_relu(in_shape, in_dtype='float16', layout='NHWC', out_dtype='float16', poly_sch=False):

    if layout != "NHWC" and layout != "NCHW":
        raise NotImplementedError(
            'Layout not supported {} '.format(layout))

    inter_dtype = 'float32'
    inputs, output, expect = gen_data(in_shape, in_dtype, inter_dtype, layout, out_dtype)
    input_shape_list = [i.shape for i in inputs]
    input_dtype_list = [inter_dtype] * 4 + [in_dtype] + [inter_dtype] * 4 + [in_dtype]
    op_attrs = [layout, out_dtype]
    if poly_sch:
        mod = utils.op_build(fused_bn_double_follow_relu_auto, input_shape_list, input_dtype_list,
                             op_attrs=op_attrs, attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_bn_double_follow_relu_manual, input_shape_list, input_dtype_list, op_attrs=op_attrs)


    outputs = [output]
    arglist = inputs + outputs
    output = utils.mod_launch(mod, arglist, expect=expect)
    
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Пример #3
0
def test_fused_l2loss_grad(shape, layout, fill_data=4e-05, poly_sch=False):
    data_1 = gen_data(shape, 'float16')
    data_2 = gen_data(shape, 'float32')

    expect, output = compute_py(data_1, data_2, layout, fill_data)
    input_list = [shape, shape]
    dtype_list = ['float16', 'float32']
    op_attrs = [layout, fill_data]
    if poly_sch:
        mod = utils.op_build(fused_l2loss_grad_auto,
                             input_list,
                             dtype_list,
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_l2loss_grad_manual,
                             input_list,
                             dtype_list,
                             op_attrs=op_attrs)

    args = [data_1, data_2, output]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array([data_1, data_2])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)
Пример #4
0
def test_fused_relu_grad(shape, c1=0, poly_sch=False):
    dtype = 'float16'
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1)
    shapes = [shape] * 3
    dtypes = [dtype] * 3
    attrs = [c1]
    if poly_sch:
        mod = utils.op_build(fused_relu_grad_auto,
                             shapes,
                             dtypes,
                             op_attrs=attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_manual,
                             shapes,
                             dtypes,
                             op_attrs=attrs)
    output = np.full(shape, np.nan, dtype)
    output = utils.mod_launch(mod, (*input, output), expect=expect)
    res = np.allclose(output, expect, rtol=5e-3, atol=1e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, expect, 400)
Пример #5
0
def test_fused_pad(shape,
                   pad_before,
                   pad_after,
                   layout='NHWC',
                   pad_value=0.0,
                   poly_sch=False):
    op_attrs = [pad_before, pad_after, layout, pad_value]
    if poly_sch:
        mod = utils.op_build(fused_pad_auto, [shape], ['float32'],
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_pad_manual, [shape], ['float32'],
                             op_attrs=op_attrs)
    data, output, expect = gen_data(shape, pad_before, pad_after, layout,
                                    pad_value)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(data)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, data, expect, 400)
Пример #6
0
def cholesky_run(shape, dtype, attrs):
    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        mod = utils.op_build(cholesky.cholesky, [shape], [dtype],
                             kernel_name=kernel_name,
                             attrs=attrs,
                             tuning=t)
        if t:
            exp_output, inputs, output = gen_data(dtype, shape)
            return mod, exp_output, (inputs, output)
        else:
            return mod
    else:
        # op_attrs=[shape, dtype]
        mod = utils.op_build(cholesky.cholesky, [shape], [dtype],
                             kernel_name='cholesky',
                             attrs=attrs)
        exp_output, inputs, output = gen_data(dtype, shape)
        # result_tvm
        acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)
        # 4) compare result
        TestCase_Result = np.allclose(acu_output,
                                      exp_output,
                                      rtol=5e-03,
                                      equal_nan=True)

        return inputs, acu_output, exp_output, TestCase_Result
Пример #7
0
def test_fused_mul_div_rsqrt_mul_isfinite_red(shape,
                                              dtype='float32',
                                              poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input)
    input_shape = [shape, shape]
    input_dtype = [dtype, dtype]
    if poly_sch:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_auto,
                             input_shape,
                             input_dtype,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_mul_div_rsqrt_mul_isfinite_red_manual,
                             input_shape, input_dtype)
    outputs = [np.full(
        (1, ), False, 'bool')] + [np.full(shape, np.nan, dtype)] * 3
    output = utils.mod_launch(mod, [*input, *outputs],
                              outputs=list(range(-len(outputs), 0)),
                              expect=expect)
    ret = compare_tensor(output[0], expect[0], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[1], expect[1], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[2], expect[2], rtol=5e-03, atol=1.e-08)
    ret &= compare_tensor(output[3], expect[3], rtol=5e-03, atol=1.e-08)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)
Пример #8
0
def test_fused_relu_grad_bn_double_update_grad(shape_f16,
                                               shape_f32,
                                               layout='NHWC',
                                               poly_sch=False):
    data_1 = gen_data(shape_f32, 'float32')
    data_2 = gen_data(shape_f16, 'float16')
    data_3 = gen_data(shape_f32, 'float32')
    data_4 = gen_data(shape_f16, 'float16')
    data_5 = gen_data(shape_f16, 'float16')
    data_6 = gen_data(shape_f16, 'float16')
    data_7 = gen_data(shape_f16, 'float16')
    shape_list = [
        shape_f32, shape_f16, shape_f32, shape_f16, shape_f16, shape_f16,
        shape_f16
    ]
    dtype_list = [
        'float32', 'float16', 'float32', 'float16', 'float16', 'float16',
        'float16'
    ]
    data_list = [data_1, data_2, data_3, data_4, data_5, data_6, data_7]
    data_tmp7, data_tmp15, data_tmp22, out_shape = compute_py(
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, layout)
    expect = [data_tmp7, data_tmp15, data_tmp22]
    output = np.full(out_shape, np.nan, 'float32')
    output = [output, output, output]

    if poly_sch:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_auto,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_double_update_grad_manual,
                             shape_list,
                             dtype_list,
                             op_attrs=[layout])

    output = utils.mod_launch(
        mod, (data_1, data_2, data_3, data_4, data_5, data_6, data_7, *output),
        outputs=tuple(range(-len(output), 0)),
        expect=expect)

    res = True
    res &= np.allclose(output[0], expect[0], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[1], expect[1], rtol=5e-03, atol=1e-8)
    res &= np.allclose(output[2], expect[2], rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data_list = to_tvm_nd_array(data_list)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data_list, *expect, 400)
def test_fused_relu_grad_bn_reduce_grad(shape_1,
                                        shape_2,
                                        layout='NHWC',
                                        poly_sch=False):
    data_1 = gen_data(shape_1, 'float32')
    data_2 = gen_data(shape_1, 'float32')
    data_3 = gen_data(shape_1, 'float32')
    data_4 = gen_data(shape_1, 'float32')
    data_5 = gen_data(shape_1, 'float32')
    data_6 = gen_data(shape_1, 'float32')
    data_7 = gen_data(shape_2, 'float16')
    data_8 = gen_data(shape_2, 'float16')
    data_9 = gen_data(shape_2, 'float16')

    expect, output = compute_py(data_1, data_2, data_3, data_4, data_5, data_6,
                                data_7, data_8, data_9, layout)
    input_list = [
        shape_1, shape_1, shape_1, shape_1, shape_1, shape_1, shape_2, shape_2,
        shape_2
    ]
    dtype_list = [
        'float32', 'float32', 'float32', 'float32', 'float32', 'float32',
        'float16', 'float16', 'float16'
    ]
    op_attrs = [layout]
    if poly_sch:
        mod = utils.op_build(fused_relu_grad_bn_reduce_grad_auto,
                             input_list,
                             dtype_list,
                             op_attrs=op_attrs,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_relu_grad_bn_reduce_grad_manual,
                             input_list,
                             dtype_list,
                             op_attrs=op_attrs)
    args = [
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9,
        output
    ]
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-08)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array([
        data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9
    ])
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Пример #10
0
def test_ms_select(shape_cond, shape_x, dtype_cond, dtype_x, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(select_auto, [shape_cond, shape_x, shape_x], [dtype_cond, dtype_x, dtype_x], attrs={"target": "cuda"})
    else:
        mod = utils.op_build(select_manual, [shape_cond, shape_x, shape_x], [dtype_cond, dtype_x, dtype_x])
    expect, cond, x1, x2, output = gen_data(shape_cond, shape_x, dtype_cond, dtype_x)
    output = utils.mod_launch(mod, (cond, x1, x2, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
def test_ms_resize_grad(shape, size, dtype, align_corners, poly_sch=False):
    op_attr = [size, align_corners]
    if poly_sch:
        mod = utils.op_build(resize_nearest_neighbor_grad_auto, [shape],
                             [dtype],
                             op_attr,
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(resize_nearest_neighbor_grad_manual, [shape],
                             [dtype], op_attr)
    data, output, expect = gen_data(shape, size, align_corners, dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    compare_res = compare_tensor(output, expect, rtol=5e-03, atol=1e-08)
Пример #12
0
 def _compilewithjson_cuda(op_func):
     input_shapes = []
     input_types = []
     for input_desc in kernel_info['input_desc']:
         input_shapes.append(input_desc[0]['shape'])
         input_types.append(input_desc[0]['data_type'])
     op_attrs = []
     if kernel_info['attr']:
         for ext_arg in kernel_info['attr']:
             op_attrs.append(ext_arg['value'])
     dump_ir = os.getenv(get_dump_ir_flag()) == "on"
     dump_code = os.getenv(get_dump_code_flag()) == "on"
     kernel_exec.op_build(op_func, input_shapes, input_types, op_attrs, kernel_info['op'], attrs=attrs,
                          dump_ir=dump_ir, dump_code=dump_code)
     return True
Пример #13
0
def test_ms_neg(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(neg_auto, [shape], [dtype], attrs={"target": "cuda"})
    else:
        mod = utils.op_build(neg_manual, [shape], [dtype])
    data, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (data, output), expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #14
0
def batch_cholesky_trsm_run(shape1, shape2, dtype, attrs):
    mod = utils.op_build(batch_cholesky_trsm.batch_cholesky_trsm,
                         [shape1, shape2], [dtype, dtype],
                         kernel_name='batch_cholesky_trsm',
                         attrs=attrs)
    exp_output, inputs1, inputs2, output = gen_data(dtype, shape1, shape2)
    #result_tvm
    acu_output = utils.mod_launch(mod, (inputs1, inputs2, output))
    # np.set_printoptions(suppress=True, precision=5)
    # batch_size = shape1[0]
    # dim = shape1[1]
    # for i in range(batch_size):
    #     for j in range(dim):
    #         for k in range(j):
    #             acu_output[i,j,k] = 0
    # dim = shape1[1]
    # acu_output[0,:,:] = np.linalg.solve(acu_output[0,:,:], np.identity(dim))
    #acu_output = acu_output[0]
    print("====")
    print(inputs1[0, :, :])
    print("====")
    print(acu_output[0, :, :])
    print("====")
    print(exp_output[0, :, :])

    TestCase_Result = np.allclose(acu_output,
                                  exp_output,
                                  rtol=5e-03,
                                  equal_nan=True)
    return inputs1, acu_output, exp_output, TestCase_Result
Пример #15
0
def triplet_loss_ad_run(shape, dtype, margin=12.0, kernel_name="triplet_loss_grad", attrs={}):
    support_list = {"float16": np.float16, "float32": np.float32}
    anchor = np.arange(np.prod(shape)).reshape(shape).astype(dtype)
    pos = anchor + 0.5
    neg = anchor + 2.0
    d_pos = np.sum((anchor - pos) * (anchor - pos), -1)
    d_neg = np.sum((anchor - neg) * (anchor - neg), -1)
    output_forward = margin + d_pos - d_neg
    output_forward[output_forward < 0.0] = 0.0
    output_forward[output_forward > 0.0] = 1.0

    d_pos1 = anchor - pos
    d_neg1 = anchor - neg
    assert_res = True
    output_all = list()
    expect_all = list()

    if 'tuning' in attrs.keys():
        t = attrs.get("tuning", False)
        kernel_name = attrs.get("kernel_name", False)
        grad = random_gaussian(shape[:-1], miu=1, sigma=0.1).astype(support_list[dtype])
        for input_id in range(3):
            mod = utils.op_build(triplet_loss_ad.triplet_loss_ad,
                                 [grad.shape, shape, shape, shape], [dtype, dtype, dtype, dtype],
                                 op_attrs=[margin, input_id], kernel_name=kernel_name, attrs=attrs, tuning=t)
            if t:
                expect, output = gen_data(d_neg1, d_pos1, dtype, grad, input_id, output_forward)
                return mod, expect, (grad, anchor, pos, neg, output)
            else:
                return mod
    else:
        grad = random_gaussian(shape[:-1], miu=1, sigma=0.1).astype(support_list[dtype])
        # Testing AD for 3 inputs of the triplet_loss op:
        # 0 - for "anchor_output"
        # 1 - for "positive_output"
        # 2 - for "negative_output"
        for input_id in range(3):
            mod = utils.op_build(triplet_loss_ad.triplet_loss_ad,
                                 [grad.shape, shape, shape, shape], [dtype, dtype, dtype, dtype],
                                 op_attrs=[margin, input_id], kernel_name='triplet_loss_ad', attrs=attrs)
            expect, output = gen_data(d_neg1, d_pos1, dtype, grad, input_id, output_forward)
            output = utils.mod_launch(mod, [grad, anchor, pos, neg, output])
            assert_res &= compare_tensor(output, expect, rtol=5e-03, atol=5e-2, equal_nan=True)
            output_all.append(output)
            expect_all.append(expect)

        return grad, tuple(output), tuple(expect), assert_res
Пример #16
0
def test_ms_rsqrt(shape1, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(rsqrt_auto, (shape1,), (dtype,), attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(rsqrt_manual, (shape1,), (dtype,))    
    expect, input1, output  = gen_data(dtype, shape1)
    args = (input1, output) 
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    input1, expect = to_tvm_nd_array([input1, expect])
    gpu_profiling(mod, input1, expect, 400)
Пример #17
0
def test_ms_minimum(shape1, shape2, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(minimum_auto, (shape1, shape2), (dtype, dtype), attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(minimum_manual, (shape1, shape2), (dtype, dtype))    
    lhs, rhs, output, expect = gen_data(shape1, shape2, dtype)
    args = (lhs, rhs, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)
Пример #18
0
def test_ms_one_hot(shape, depth, dtype, on_value, off_value, axis, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(one_hot_auto, [shape], [dtype], op_attrs=[on_value, off_value, depth, axis, dtype], attrs={"target": "cuda"})
    else:
        mod = utils.op_build(one_hot_manual, [shape], [dtype], op_attrs=[on_value, off_value, depth, axis, dtype])

    # gen data
    expect, data_tmp, on_value_tensor, off_value_tensor, output = gen_data(axis, depth, dtype, shape, on_value, off_value)
    data = data_tmp.astype(dtype)
    output = utils.mod_launch(mod, (data, output), expect = expect)
    ret = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8, equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #19
0
def test_fused_is_finite(shape, layout='NHWC', poly_sch=False):

    if poly_sch:
        mod = utils.op_build(fused_is_finite_auto, [shape], ['float32'], op_attrs=[layout], attrs={"target": "cuda"})    
    else:
        mod = utils.op_build(fused_is_finite_manual, [shape], ['float32'], op_attrs=[layout])    
    data, expect, output = gen_data(shape, 'float32', layout)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect = expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #20
0
def test_ms_trans_data(shape, axes, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(trans_data_auto, [shape], [dtype],
                             op_attrs=[axes],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(trans_data_manual, [shape], [dtype],
                             op_attrs=[axes])
    data, output, expect = gen_data(shape, axes, dtype)
    output = utils.mod_launch(mod, (data, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #21
0
def test_ms_log(in_shape, in_dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(log_auto, (in_shape, ), (in_dtype, ),
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(log_manual, (in_shape, ), (in_dtype, ))
    data, output, expect = gen_data(in_shape, in_dtype)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03,
                      atol=1.e-7)  #  from 1e-8 changing to 1e-7
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #22
0
def test_ms_equal(shapes, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(equal_auto,
                             shapes, [dtype, dtype],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(equal_manual, shapes, [dtype, dtype])
    inputs1, output1, expect1 = gen_data(shapes, dtype)
    output1 = utils.mod_launch(mod, (*inputs1, output1), expect=expect1)

    if shapes[0] == shapes[1]:
        inputs2 = []
        inputs2.append(inputs1[0])
        inputs2.append(inputs1[0])
        expect2 = np.equal(inputs2[0], inputs2[1])
        output2 = np.full(expect2.shape, 0, bool)
        output2 = utils.mod_launch(mod, (*inputs2, output2), expect=expect1)

        res = np.allclose(output1, expect1, rtol=5e-03,
                          atol=1.e-8) and np.allclose(
                              output2, expect2, rtol=5e-03, atol=1.e-8)
        print("Test {}".format("Pass" if res else "Fail"))
        if not res:
            print("Error cuda:========================")
            print(mod.imported_modules[0].get_source())
            raise AssertionError("Test fail")

        inputs1 = to_tvm_nd_array(inputs1)
        inputs2 = to_tvm_nd_array(inputs2)
        expect1 = to_tvm_nd_array(expect1)
        expect2 = to_tvm_nd_array(expect2)
        gpu_profiling(mod, *inputs1, expect1, *inputs2, expect2, 400)
    else:
        res = np.allclose(output1, expect1, rtol=5e-03, atol=1.e-8)
        print("Test {}".format("Pass" if res else "Fail"))
        if not res:
            print("Error cuda:========================")
            print(mod.imported_modules[0].get_source())
            raise AssertionError("Test fail")

        inputs1 = to_tvm_nd_array(inputs1)
        expect1 = to_tvm_nd_array(expect1)
        gpu_profiling(mod, *inputs1, expect1, 400)
Пример #23
0
def test_ms_divide(shape, dtype, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(divide_auto, [shape, shape], [dtype, dtype],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(divide_manual, [shape, shape], [dtype, dtype])
    lhs, rhs, output, expect = gen_data(shape, dtype)
    output = utils.mod_launch(mod, (lhs, rhs, output), expect=expect)
    ret = compare_tensor(output,
                         expect,
                         rtol=5e-03,
                         atol=1.e-8,
                         equal_nan=True)
    print("Test {}".format("Pass" if ret else "Failed"))
    if not ret:
        print("Error cuda:==========================")
        print(mod.imported_modules[0].get_soure())
        raise AssertionError("Test fail")
    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)
Пример #24
0
def test_ms_addn(shape, dtype, n, poly_sch=False):
    shapes = []
    for i in range(n):
        shapes.append(shape)
    if poly_sch:
        mod = utils.op_build(addn_auto, [shapes], [dtype],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(addn_manual, [shapes], [dtype])
    expect, inputs, output = gen_data(shape, shapes, dtype, n)
    output = utils.mod_launch(mod, (*inputs, output), expect=expect)
    res = compare_tensor(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    inputs = to_tvm_nd_array(inputs)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *inputs, expect, 400)
Пример #25
0
def test_ms_reduce_max(in_shape,
                       in_dtype,
                       axis=None,
                       keepdims=False,
                       poly_sch=False):
    if poly_sch:
        mod = utils.op_build(reduce_max_auto, (in_shape, ), (in_dtype, ),
                             op_attrs=[axis, keepdims],
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(reduce_max_manual, (in_shape, ), (in_dtype, ),
                             op_attrs=[axis, keepdims])
    data, output, expect = gen_data(in_shape, in_dtype, axis, keepdims)
    args = (data, output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")
    data, expect = to_tvm_nd_array([data, expect])
    gpu_profiling(mod, data, expect, 400)
Пример #26
0
def test_fused_bn_update(shape, dtype, c1=(1 / (256 * 7 * 7)), c2=1.001e-05, c3=1.00007975, c4=0.100000024, poly_sch=False):
    input = gen_data(shape, dtype)
    expect = compute_expect(input, c1, c2, c3, c4)
    attrs = [dtype, c1, c2, c3, c4]
    shapes = [input[0].shape] * 4
    dtypes = [dtype] * 4
    if poly_sch:
        mod = utils.op_build(fused_bn_update_auto, shapes, dtypes, op_attrs=attrs, attrs={"target": "cuda"})
    else:
        mod = utils.op_build(fused_bn_update_manual, shapes, dtypes, op_attrs=attrs)
    outputs =  [np.full(shape, np.nan, dtype)] * 3
    attrs_list =  input + outputs
    output = utils.mod_launch(mod, attrs_list, outputs=(range(-len(outputs), 0)), expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Failed"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    data = to_tvm_nd_array(input)
    expect = to_tvm_nd_array(expect)
    gpu_profiling(mod, *data, *expect, 400)
Пример #27
0
def test_ms_bmm(shape1, shape2, dtype, shape_bias=None, poly_sch=False):
    if poly_sch:
        mod = utils.op_build(batch_matmul_auto, (shape1, shape2, shape_bias),
                             (dtype, dtype) if shape_bias is None else
                             (dtype, dtype, dtype),
                             attrs={"target": "cuda"})
    else:
        mod = utils.op_build(batch_matmul_manual, (shape1, shape2, shape_bias),
                             (dtype, dtype) if shape_bias is None else
                             (dtype, dtype, dtype))
    lhs, rhs, bias, output, expect = gen_data(shape1, shape2, dtype,
                                              shape_bias)
    args = (lhs, rhs, output) if shape_bias is None else (lhs, rhs, bias,
                                                          output)
    output = utils.mod_launch(mod, args, expect=expect)
    res = np.allclose(output, expect, rtol=5e-03, atol=1.e-8)
    print("Test {}".format("Pass" if res else "Fail"))
    if not res:
        print("Error cuda:========================")
        print(mod.imported_modules[0].get_source())
        raise AssertionError("Test fail")

    lhs, rhs, expect = to_tvm_nd_array([lhs, rhs, expect])
    gpu_profiling(mod, lhs, rhs, expect, 400)
Пример #28
0
def gen_kernel_conv_bn1(op_desc: ConvDesc,
                        input_shape,
                        index_table,
                        config: ConvConfig = None,
                        idx=None,
                        gen_tiling_spaces=False):
    """Compile kernel module for conv_bn1"""
    if index_table is not None:
        raise RuntimeError('index_table should be none')
    kernel_name = "conv_bn1_poly"
    if idx is not None:
        kernel_name += str(idx)

    if config is None:
        attrs = {'dim': ""}
    else:
        tile_hh = config.tile_h
        tile_coco = config.tile_co
        tile_mm = config.tile_m
        tile_kk = config.tile_k
        tile_nn = config.tile_n
        tile_ww = config.tile_w
        tiling_param = [tile_hh, tile_coco, tile_mm, tile_kk, tile_nn, tile_ww]
        attrs = {'conv_tile': tiling_param, 'bypass': config.bypass}

    if op_desc.use_bias:
        shape = [input_shape[0], input_shape[1], input_shape[2]]
    else:
        shape = [input_shape[0], input_shape[1]]
    conv_dtype = 'float16'

    return utils.op_build(conv_bn1.conv_bn1, [shape], [conv_dtype],
                          op_attrs=[
                              op_desc.fmap_shape, op_desc.filter_shape,
                              op_desc.pad, op_desc.stride, op_desc.dilation,
                              op_desc.use_bias, attrs
                          ],
                          kernel_name=kernel_name,
                          attrs=attrs,
                          polyhedral=True,
                          tuning=gen_tiling_spaces)
Пример #29
0
def trace_extract_run(shape, dtype, attrs):
    """
    ops run func.
    """
    mod = utils.op_build(SecondOrder_trace_extract.trace_extract, [shape],
                         [dtype],
                         kernel_name='trace',
                         attrs=attrs)
    exp_output, inputs, output = gen_data(dtype, shape)
    #result_tvm
    acu_output = utils.mod_launch(mod, (inputs, output), expect=exp_output)
    # 4) compare result
    print('----result----')
    print(acu_output)
    print('----compare---')
    print(exp_output)
    TestCase_Result = np.allclose(acu_output,
                                  exp_output,
                                  rtol=5e-03,
                                  equal_nan=True)

    return inputs, acu_output, exp_output, TestCase_Result
Пример #30
0
def diag_split_matrix_run(shape, dtype, attrs):
    """
    ops run func.
    """
    dim = shape[0]
    if (dim // split_dim) > 32:
        mod = utils.op_build_test(SecondOrder_diag_split_matrix.diag_split_matrix_4608, [shape], [dtype], kernel_name='trace', attrs=attrs)
        exp_output, inputs, out1, out2 = gen_data1(dtype, shape)
        acu_output1, acu_output2 = utils.mod_launch(mod, (inputs, out1, out2), (-2, -1), expect=exp_output)
        print("=====",dim," compare====")
        print(acu_output1.shape)
        print(acu_output2.shape)
        print("=====",dim," compare====")
        acu_output = np.concatenate((acu_output1, acu_output2), axis = 0 )
        TestCase_Result=np.allclose(acu_output, exp_output, rtol=5e-03, equal_nan=True)
        return inputs,acu_output,exp_output,TestCase_Result
    elif dim == 576:
        mod = utils.op_build_test(SecondOrder_diag_split_matrix.diag_split_matrix_576, [shape], [dtype], kernel_name='trace', attrs=attrs)
        exp_output1, exp_output2, inputs, out1, out2 = gen_data3(dtype, shape)
        acu_output1, acu_output2 = utils.mod_launch(mod, (inputs, out1, out2), (-2, -1), expect=exp_output1)
        print("=====",dim," compare====")
        print(acu_output1.shape)
        print(acu_output2.shape)
        print("=====",dim," compare====")
        # acu_output = np.concatenate((acu_output1, acu_output2), axis = 0 )
        TestCase_Result=np.allclose(acu_output1, exp_output1, rtol=5e-03, equal_nan=True)
        TestCase_Result=np.allclose(acu_output2, exp_output2, rtol=5e-03, equal_nan=True)
        return inputs,acu_output1,exp_output1,TestCase_Result
    else:
        mod = utils.op_build(SecondOrder_diag_split_matrix.diag_split_matrix_small, [shape], [dtype], kernel_name='trace01', attrs=attrs)
        exp_output, inputs, out1 = gen_data2(dtype, shape)
        acu_output = utils.mod_launch(mod, (inputs, out1), expect=exp_output)
        print("=====",dim," compare====")
        print(acu_output.shape)
        print("=====",dim," compare====")
        TestCase_Result=np.allclose(acu_output, exp_output, rtol=5e-03, equal_nan=True)
        return inputs,acu_output,exp_output,TestCase_Result