def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter,
                                 kernel, stride, padding, output_padding):
    in_height, in_width = in_size
    kernel_height, kernel_width = kernel
    stride_height, stride_width = stride
    pad_top, pad_left, pad_bottom, pad_right = padding

    A = te.placeholder((batch, in_channel, in_height, in_width), name='A')
    W = te.placeholder((in_channel, num_filter, kernel_height, kernel_width),
                       name='W')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize(
        "topi.tests.test_topi_conv2d_transpose.verify_conv2d_transpose_nchw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = tvm.topi.testing.conv2d_transpose_nchw_python(
            a_np, w_np, stride, padding, output_padding)
        c_np = np.maximum(b_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device, ctx):
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv2d_transpose_nchw_implement)
            B = fcompute(A, W, [stride_height, stride_width],
                         [pad_top, pad_left, pad_bottom, pad_right], A.dtype,
                         output_padding)
            C = topi.nn.relu(B)
            s1 = fschedule([B])
            s2 = fschedule([C])
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)

        func1 = tvm.build(s1, [A, W, B], device)
        func2 = tvm.build(s2, [A, W, C], device)
        func1(a, w, b)
        func2(a, w, c)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device, ctx in tvm.testing.enabled_targets():
        check_device(device, ctx)
示例#2
0
 def _compute_conv1d(attrs, inputs, out_type):
     """Compute definition of conv1d"""
     strides = get_const_tuple(attrs.strides)
     padding = get_const_tuple(attrs.padding)
     dilation = get_const_tuple(attrs.dilation)
     out_dtype = attrs.out_dtype
     out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype
     return [
         topi_compute(inputs[0], inputs[1], strides, padding, dilation,
                      out_dtype)
     ]
示例#3
0
 def compute_conv3d_transpose(attrs, inputs, out_dtype):
     """Compute definition of conv3d_transpose"""
     padding = get_const_tuple(attrs.padding)
     strides = get_const_tuple(attrs.strides)
     output_padding = get_const_tuple(attrs.output_padding)
     out_dtype = attrs.out_dtype
     out_dtype = (inputs[0].dtype if out_dtype in ("same",
                                                   "") else out_dtype)
     out = topi_compute(inputs[0], inputs[1], strides, padding, out_dtype,
                        output_padding)
     return [out]
示例#4
0
 def get_ref_data():
     a_np = generate_quantized_np(get_const_tuple(A.shape), activation_bits, input_type)
     w_np = generate_quantized_np(get_const_tuple(W.shape), weight_bits, input_type)
     if unipolar:
         w_ = np.copy(w_np).astype(out_dtype)
         for x in np.nditer(w_, op_flags=['readwrite']):
             x[...] = 1 if x == 1 else -1
         b_np = tvm.topi.testing.conv2d_nhwc_python(a_np, w_, stride, padding).astype(out_dtype)
     else:
         b_np = tvm.topi.testing.conv2d_nhwc_python(a_np, w_np, stride, padding).astype(out_dtype)
     return a_np, w_np, b_np
示例#5
0
 def _compute_dilation2d(attrs, inputs, out_type):
     padding = get_const_tuple(attrs.padding)
     strides = get_const_tuple(attrs.strides)
     dilations = get_const_tuple(attrs.dilations)
     data_layout = attrs.get_str("data_layout")
     out_dtype = attrs.out_dtype
     out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype
     args = [inputs[0], inputs[1], strides, padding, dilations]
     if need_data_layout:
         args.append(data_layout)
     args.append(out_dtype)
     return [topi_compute(*args)]
示例#6
0
 def _compute_deformable_conv2d(attrs, inputs, out_dtype):
     assert attrs.data_layout == "NCHW"
     padding = get_const_tuple(attrs.padding)
     strides = get_const_tuple(attrs.strides)
     dilation = get_const_tuple(attrs.dilation)
     deformable_groups = attrs.deformable_groups
     groups = attrs.groups
     out_dtype = attrs.out_dtype
     out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype
     out = topi_compute(inputs[0], inputs[1], inputs[2], strides, padding,
                        dilation, deformable_groups, groups, out_dtype)
     return [out]
def verify_pool_grad(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True,
                     add_relu=False):
    """verify function of pool_grad"""
    iw = ih
    kw = kh
    sw = sh
    pt, pl, pb, pr = padding
    A = te.placeholder((n, ic, ih, iw), name='A')
    B = topi.nn.pool(A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
                     pool_type=pool_type, ceil_mode=ceil_mode,
                     layout="NCHW", count_include_pad=count_include_pad)
    dtype = A.dtype

    bshape = get_const_tuple(B.shape)
    ashape = get_const_tuple(A.shape)
    if ceil_mode:
        assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1)
        assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1)
    else:
        assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1)
        assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1)
    OutGrad = te.placeholder(bshape, name='OutGrad')
    PoolGrad = topi.nn.pool_grad(OutGrad, A, kernel=[kh, kw], stride=[sh, sw], padding=padding,
                                 pool_type=pool_type, ceil_mode=ceil_mode,
                                 layout="NCHW", count_include_pad=count_include_pad)
    if add_relu:
        PoolGrad = topi.nn.relu(PoolGrad)

    a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype)
    out_grad_np = np.random.uniform(low=0.001, size=bshape).astype(dtype)
    pool_grad_np = tvm.topi.testing.pool_grad_nchw(a_np, out_grad_np, pool_size=(kh, kw),
                                               strides=(sh, sw), padding=padding,
                                               pool_type=pool_type, ceil_mode=ceil_mode,
                                               count_include_pad=count_include_pad)
    if add_relu:
        pool_grad_np = np.maximum(pool_grad_np, 0.)

    def check_device(device, ctx):
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            s_func = tvm.topi.testing.dispatch(device, _pool_grad_schedule)
            s = s_func(PoolGrad)

        a = tvm.nd.array(a_np, ctx)
        out_grad = tvm.nd.array(out_grad_np, ctx)
        pool_grad = tvm.nd.array(np.zeros(get_const_tuple(PoolGrad.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, OutGrad, PoolGrad], device)
        f(a, out_grad, pool_grad)
        tvm.testing.assert_allclose(pool_grad.asnumpy(), pool_grad_np, rtol=1e-5)

    for device, ctx in tvm.testing.enabled_targets():
        check_device(device, ctx)
示例#8
0
def verify_deformable_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, deformable_groups=1, groups=1):
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size,
            num_filter, kernel, stride, padding, dilation, deformable_groups, groups))

    A = te.placeholder((batch, in_channel, in_size, in_size), name='A')
    out_size = (in_size - (kernel - 1) * dilation - 1 + 2 * padding) // stride + 1
    Offset = te.placeholder((batch, deformable_groups * kernel * kernel * 2, out_size, out_size), name='offset')
    W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
    bias = te.placeholder((num_filter, 1, 1), name='bias')

    a_shape = get_const_tuple(A.shape)
    offset_shape = get_const_tuple(Offset.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_deformable_conv2d_nchw.verify_deformable_conv2d_nchw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        offset_np = np.random.randn(*offset_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        c_np = tvm.topi.testing.deformable_conv2d_nchw_python(a_np, offset_np, w_np, stride, padding,
                                                          dilation, deformable_groups, groups)

        return a_np, offset_np, w_np, c_np

    a_np, offset_np, w_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        fcompute, fschedule = tvm.topi.testing.dispatch(device, _deformable_conv2d_implement)
        with tvm.target.create(device):
            C = fcompute(A, Offset, W, stride, padding, dilation,
                         deformable_groups, groups, dtype)
            s = fschedule([C])

            a = tvm.nd.array(a_np, ctx)
            offset = tvm.nd.array(offset_np, ctx)
            w = tvm.nd.array(w_np, ctx)
            c = tvm.nd.empty(c_np.shape, dtype=c_np.dtype, ctx=ctx)

            func = tvm.build(s, [A, Offset, W, C], device)
            func(a, offset, w, c)
            tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)
示例#9
0
def conv2d_NCHWc_shape_func(attrs, inputs, _):
    """
    Shape function for contrib_conv2d_NCHWc op.
    """
    strides = get_const_tuple(attrs.strides)
    padding = get_const_tuple(attrs.padding)
    dilation = get_const_tuple(attrs.dilation)
    out_layout = attrs.out_layout
    oc_bn = int(out_layout[4:-1])

    return [_conv2d_NCHWc_shape_func(inputs[0], inputs[1],
                                     convert(strides), convert(padding),
                                     convert(dilation), convert(oc_bn))]
示例#10
0
def verify_leaky_relu(m, alpha):
    A = te.placeholder((m, ), name='A')
    B = topi.nn.leaky_relu(A, alpha)
    s = te.create_schedule([B.op])

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], "llvm", name="leaky_relu")
    foo(a, b)
    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
示例#11
0
 def get_ref_data(a_shape, b_shape, input_dtype):
     a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits,
                                  input_dtype)
     b_np = generate_quantized_np(get_const_tuple(b_shape), weight_bits,
                                  input_dtype)
     if unipolar:
         b_ = np.copy(b_np).astype(out_dtype)
         for x in np.nditer(b_, op_flags=['readwrite']):
             x[...] = 1 if x == 1 else -1
         c_np = np.dot(a_np, b_.T)
     else:
         c_np = np.dot(a_np, b_np.T)
     return a_np, b_np, c_np
示例#12
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        # build the kernel
        f = tvm.build(schedule, [Input, Out_grad, Weight_grad], device)
        # prepare pod type for test data closure
        dtype = Out_grad.dtype
        out_grad_shape = get_const_tuple(Out_grad.shape)
        in_shape = get_const_tuple(Input.shape)

        # use memoize to pickle the test data for next time use
        @memoize("topi.tests.test_topi_depthwise_conv2d_backward_weight.nhwc")
        def get_ref_data():
            out_grad_np = np.random.uniform(size=out_grad_shape).astype(dtype)
            input_np = np.random.uniform(size=in_shape).astype(dtype)
            dilated_out_grad_np = tvm.topi.testing.dilate_python(
                out_grad_np, [1, stride_h, stride_w, 1])

            pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(
                [padding_h, padding_w], (filter_h, filter_w))
            padded_input_np = np.zeros(
                (batch, in_h + pad_top + pad_bottom,
                 in_w + pad_left + pad_right, in_channel))
            padded_input_np[:, pad_top:in_h + pad_top,
                            pad_left:in_w + pad_left, :] = input_np

            weight_grad_np = np.zeros(
                (filter_h, filter_w, in_channel, channel_multiplier))
            for c in range(in_channel):
                for m in range(channel_multiplier):
                    for b in range(batch):
                        weight_grad_np[:, :, c, m] += signal.convolve2d(padded_input_np[b, :, :, c], \
                            np.rot90(dilated_out_grad_np[b, :, :, c*channel_multiplier+m%channel_multiplier], 2), \
                            mode='valid')[0:filter_h, 0:filter_w]
            return (out_grad_np, input_np, weight_grad_np)

        (out_grad_np, input_np, weight_grad_np) = get_ref_data()

        out_grad_tvm = tvm.nd.array(out_grad_np, ctx)
        input_tvm = tvm.nd.array(input_np, ctx)
        weight_grad_tvm = tvm.nd.array(np.zeros(shape=fshape, dtype=dtype),
                                       ctx)
        # launch the kernel
        timer = f.time_evaluator(f.entry_name, ctx, number=1)
        tcost = timer(input_tvm, out_grad_tvm, weight_grad_tvm).mean
        tvm.testing.assert_allclose(weight_grad_np,
                                    weight_grad_tvm.asnumpy(),
                                    rtol=1e-4)
示例#13
0
 def compute_bitserial_conv2d(attrs, inputs, out_dtype):
     """Compute definition for bitserial conv2d."""
     padding = get_const_tuple(attrs.padding)
     strides = get_const_tuple(attrs.strides)
     activation_bits = attrs.activation_bits
     weight_bits = attrs.weight_bits
     pack_dtype = attrs.pack_dtype
     out_dtype = attrs.out_dtype
     unipolar = attrs.unipolar
     return [
         topi_compute(inputs[0], inputs[1], strides, padding,
                      activation_bits, weight_bits, pack_dtype, out_dtype,
                      unipolar)
     ]
def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel,
                                stride, padding, output_padding):
    in_width = in_size
    A = te.placeholder((batch, in_channel, in_width), name='A')
    W = te.placeholder((in_channel, num_filter, kernel), name='W')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize(
        "topi.tests.test_topi_conv1d_transpose.verify_conv1d_transpose_ncw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = tvm.topi.testing.conv1d_transpose_ncw_python(
            a_np, w_np, stride, padding, output_padding)
        c_np = np.maximum(b_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        if not ctx.exist:
            print("Skip because %s is not enabled" % device)
            return
        with tvm.target.create(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv1d_transpose_ncw_implement)
            B = fcompute(A, W, stride, padding, A.dtype, output_padding)
            C = topi.nn.relu(B)
            s1 = fschedule([B])
            s2 = fschedule([C])
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)

        func1 = tvm.build(s1, [A, W, B], device)
        func2 = tvm.build(s2, [A, W, C], device)
        func1(a, w, b)
        func2(a, w, c)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)
示例#15
0
def test_conv2d_hwcn_map():
    batch = 64
    in_channel = 128
    in_height = 16
    in_width = 16
    num_filter = 128
    kernel = 3
    stride = 2
    padding = 'SAME'

    A = te.placeholder((in_height, in_width, in_channel, batch), name='A')
    W = te.placeholder((kernel, kernel, in_channel, num_filter), name='W')
    B = topi.nn.conv2d_hwcn(A, W, stride, padding)
    C = topi.nn.relu(B)
    s1 = topi.cuda.schedule_conv2d_hwcn([B])
    s2 = topi.cuda.schedule_conv2d_hwcn([C])

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
    w_np = np.random.uniform(size=get_const_tuple(W.shape)).astype(W.dtype)
    b_np = tvm.topi.testing.conv2d_hwcn_python(a_np, w_np, stride, padding)
    c_np = np.maximum(b_np, 0)

    def check_device(device):
        if not tvm.runtime.enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)

        with tvm.transform.PassContext(
                config={
                    "tir.UrollLoop": {
                        "auto_unroll_max_step": 128,
                        "explicit_unroll": device == "rocm"
                    }
                }):
            func1 = tvm.build(s1, [A, W, B], device)
            func1(a, w, b)
            tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
            func2 = tvm.build(s2, [A, W, C], device)
            func2(a, w, c)
            tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'rocm']:
        check_device(device)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)

        conv2d_nchw, schedule_conv2d_nchw = tvm.topi.testing.get_conv2d_nchw_implement(device)

        k = 10.0
        dilation = (1, 1)
        with tvm.target.create(device):
            A = te.placeholder((batch, in_channel, in_size, in_size), name='A')
            W = te.placeholder((num_filter, in_channel, kernel, kernel), name='W')
            B = conv2d_nchw(A, W, stride, padding, dilation, A.dtype)
            if typ == "add":
                C = B + k
            elif typ == "sub":
                C = B - k
            elif typ == "mul":
                C = B * k
            elif typ == "div":
                C = B / k
            else:
                raise NotImplementedError()
            s = schedule_conv2d_nchw([C])

        foo = tvm.build(s, [A, W, B, C], device, name="conv2d_scalar_" + typ)

        a_npy = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
        w_npy = np.random.uniform(size=get_const_tuple(W.shape)).astype(W.dtype)
        b_npy = tvm.topi.testing.conv2d_nchw_python(a_npy, w_npy, stride, padding)
        c_npy = np.random.uniform(size=get_const_tuple(B.shape)).astype(B.dtype)
        if typ == "add":
            c_npy = b_npy + k
        elif typ == "sub":
            c_npy = b_npy - k
        elif typ == "mul":
            c_npy = b_npy * k
        elif typ == "div":
            c_npy = b_npy / k
        else:
            raise NotImplementedError()

        a_nd = tvm.nd.array(a_npy, ctx)
        w_nd = tvm.nd.array(w_npy, ctx)
        b_nd = tvm.nd.array(np.empty(b_npy.shape).astype(B.dtype), ctx)
        c_nd = tvm.nd.array(np.empty(c_npy.shape).astype(C.dtype), ctx)
        foo(a_nd, w_nd, b_nd, c_nd)
        tvm.testing.assert_allclose(c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)
示例#17
0
def verify_conv2d_nhwc(batch,
                       in_channel,
                       in_size,
                       num_filter,
                       kernel,
                       stride,
                       padding,
                       dilation=1):
    in_height = in_width = in_size

    A = te.placeholder((batch, in_height, in_width, in_channel), name="A")
    W = te.placeholder((kernel, kernel, in_channel, num_filter), name="W")

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv2d_nhwc.verify_nhwc.v2")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(w_np,
                                               (dilation, dilation, 1, 1))
        b_np = tvm.topi.testing.conv2d_nhwc_python(a_np, dw_np, stride,
                                                   padding)
        return a_np, w_np, b_np

    a_np, w_np, b_np = get_ref_data()

    def check_device(device):
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv2d_nhwc_implement)
            B = fcompute(A, W, stride, padding, dilation, dtype)
            s = fschedule([B])
        ctx = tvm.context(device, 0)
        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                         ctx)
        func = tvm.build(s, [A, W, B], device)
        func(a, w, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ["llvm", "cuda"]:
        check_device(device)
示例#18
0
def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter,
                                 kernel, stride, padding, activation_bits,
                                 weight_bits, unipolar):
    in_height = in_width = in_size
    input_dtype = 'uint32'
    out_dtype = 'int32'

    with tvm.target.Target('llvm'):
        A = te.placeholder((batch, in_height, in_width, in_channel),
                           dtype=input_dtype,
                           name='A')
        W = te.placeholder((kernel, kernel, in_channel, num_filter),
                           dtype=input_dtype,
                           name='W')
        B = topi.x86.bitserial_conv2d_nhwc(A, W, stride, padding,
                                           activation_bits, weight_bits,
                                           input_dtype, out_dtype, unipolar)
        s = topi.x86.schedule_bitserial_conv2d_nhwc([B])

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)

    @memoize("topi.tests.test_topi_bitseral_conv2d_nhwc")
    def get_ref_data():
        a_np = generate_quantized_np(get_const_tuple(a_shape), activation_bits,
                                     input_dtype)
        w_np = generate_quantized_np(get_const_tuple(w_shape), weight_bits,
                                     input_dtype)
        if unipolar:
            w_ = np.copy(w_np).astype(out_dtype)
            for x in np.nditer(w_, op_flags=['readwrite']):
                x[...] = 1 if x == 1 else -1
            b_np = tvm.topi.testing.conv2d_nhwc_python(
                a_np, w_, stride, padding).astype(out_dtype)
        else:
            b_np = tvm.topi.testing.conv2d_nhwc_python(
                a_np, w_np, stride, padding).astype(out_dtype)
        return a_np, w_np, b_np

    a_np, w_np, b_np = get_ref_data()

    ctx = tvm.cpu(0)
    a = tvm.nd.array(a_np, ctx)
    w = tvm.nd.array(w_np, ctx)
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    func = tvm.build(s, [A, W, B], 'llvm')

    func(a, w, b)
    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
示例#19
0
    def check_device(device, host="llvm"):
        ctx = tvm.context(device, 0)
        if not tvm.runtime.enabled(host):
            return
        if not ctx.exist:
            print("skip because %s is not enabled.." % device)
            return

        sout = te.create_schedule(out.op)
        mout = tvm.build(sout, [out] + inputs)
        out_shape = get_const_tuple(out.shape)

        l, h = data_range
        input_data = [
            tvm.nd.array(
                np.random.uniform(l, h, size=get_const_tuple(
                    input.shape)).astype(input.dtype)) for input in inputs
        ]

        ones = topi.full_like(out, 1.0)
        # we provide head to sum and reduce the output dimension,
        # which equals to grad(out.sum(), inputs)
        grads = te.gradient(out, inputs, head=ones)
        grad_sched = te.create_schedule([grad.op for grad in grads])
        mgrad = tvm.build(grad_sched, list(grads) + inputs)
        # print(tvm.lower(grad_sched, list(grads) + inputs, simple_mode=True))

        grad_data = [
            tvm.nd.empty(get_const_tuple(i.shape), g.dtype)
            for i, g in zip(inputs, grads)
        ]

        mgrad(*grad_data, *input_data)
        g_res = [g.asnumpy() for g in grad_data]

        if desired_grads:
            assert isinstance(desired_grads, list)
            for actual, desired in zip(g_res, desired_grads):
                assert_allclose(actual, desired, rtol=0.1, atol=1e-2)
        else:

            def forward(*in_data):
                out_data = tvm.nd.empty(out_shape, out.dtype)
                mout(out_data, *[tvm.nd.array(d) for d in list(in_data)])
                return out_data.asnumpy().sum()

            check_numerical_grads(forward, [d.asnumpy() for d in input_data],
                                  g_res)
示例#20
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return

        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _group_conv2d_nchw_implement)
            C = fcompute(A, W, stride, padding, dilation, groups, dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" %\
                (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" % \
            (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
示例#21
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        if device == "cuda" and not tvm.contrib.nvcc.have_int8(
                ctx.compute_version):
            print("Skip because int8 intrinsics are not available")
            return

        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            C = topi.cuda.group_conv2d_NCHWc_int8(A, W, stride, padding,
                                                  dilation, groups, dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = topi.cuda.schedule_group_conv2d_NCHWc_int8([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" %\
                (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d" % \
            (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation, groups))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
示例#22
0
 def _compute_roi_align(attrs, inputs, out_type):
     assert attrs.layout == "NCHW"
     pooled_size = get_const_tuple(attrs.pooled_size)
     return [topi_compute(inputs[0], inputs[1],
                          pooled_size=pooled_size,
                          spatial_scale=attrs.spatial_scale,
                          sample_ratio=attrs.sample_ratio)]
示例#23
0
def dilation2d_strategy(attrs, inputs, out_type, target):
    """dilation2d_strategy generic strategy"""
    logger.warning("dilation2d_strategy is not optimized for this platform.")
    strategy = _op.OpStrategy()
    dilations = get_const_tuple(attrs.dilations)
    layout = attrs.data_layout
    kernel_layout = attrs.kernel_layout

    assert layout in ["NCHW", "NHWC"]
    (dilation_h, dilation_w) = dilations
    if dilation_h < 1 or dilation_w < 1:
        raise ValueError("dilation should be positive value")

    if layout == "NCHW":
        assert kernel_layout == "IHW"
        strategy.add_implementation(
            wrap_compute_dilation2d(topi.image.dilation2d_nchw),
            wrap_topi_schedule(topi.generic.schedule_dilation2d_nchw),
            name="dilation2d_nchw.generic")
    elif layout == "NHWC":
        assert kernel_layout == "HWI"
        strategy.add_implementation(
            wrap_compute_dilation2d(topi.image.dilation2d_nhwc),
            wrap_topi_schedule(topi.generic.schedule_dilation2d_nhwc),
            name="dilation2d_nhwc.generic")
    else:
        raise RuntimeError("Unsupported dilation2d layout {}".format(layout))
    return strategy
示例#24
0
def split_shape_func(attrs, inputs, _):
    """
    Shape function for split op.
    """
    if isinstance(attrs.indices_or_sections, (int, tvm.tir.IntImm)):
        indices_or_sections = get_const_int(attrs.indices_or_sections)
        assert indices_or_sections > 0, "Slice count must be > 0"
    else:
        indices_or_sections = list(get_const_tuple(attrs.indices_or_sections))
        assert sorted(
            indices_or_sections)[0] > 0 and indices_or_sections == sorted(
                indices_or_sections), "split_indices must be sorted"

    axis = get_const_int(attrs.axis)

    if axis < 0:
        axis += get_const_int(inputs[0].shape[0])

    num_out = (indices_or_sections if isinstance(indices_or_sections, int) else
               len(indices_or_sections) + 1)
    if isinstance(indices_or_sections, int):
        indices_or_sections = [indices_or_sections]
    return [
        _split_shape_func(inputs[0], convert(i), convert(indices_or_sections),
                          convert(axis)) for i in range(num_out)
    ]
示例#25
0
def squeeze_shape_func(attrs, inputs, _):
    """
    Shape function for squeeze op.
    """
    axis = attrs.axis if attrs.axis is None else get_const_tuple(attrs.axis)
    keep_axes = []
    remove_axes = []
    if axis is not None:
        for i in range(inputs[0].shape[0].value):
            if i not in axis:
                keep_axes.append(i)
            else:
                remove_axes.append(i)

    # Due to current relay type system, it is possible even
    # a static kernel function needs shape function. To handle
    # this case, we allow axis to be None in squeeze shape func
    # for now.
    # TODO(kevinthesun): Enhance relay type system to avoid this.
    if keep_axes:
        out = _squeeze_shape_func(inputs[0], convert(keep_axes),
                                  convert(remove_axes))
    else:
        out = te.compute((), lambda *indices: 0)
    return [out]
示例#26
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(ctx):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            C = topi.x86.conv2d_NCHWc(A, W, (stride, stride),
                                      (padding, padding), (dilation, dilation),
                                      'NCHW%dc' % ic_block,
                                      "NCHW%dc" % oc_block, dtype)
            s = topi.x86.schedule_conv2d_NCHWc([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        func = tvm.build(s, [A, W, C],
                         device,
                         name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                         (batch, in_channel, in_size, num_filter, kernel,
                          stride, padding, dilation))
        # print(tvm.lower(s, [A, W, C], simple_mode=True))
        func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-3)
def verify_global_pool(dshape, pool_type, layout='NCHW'):
    """verify function of global_pool"""
    assert layout in ["NCHW", "NHWC"]
    A = te.placeholder(shape=dshape, name='A')
    B = topi.nn.global_pool(A, pool_type=pool_type, layout=layout)
    B = topi.nn.relu(B)

    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)

    axis = (layout.find('H'), layout.find('W'))
    if pool_type == 'avg':
        b_np = np.mean(a_np, axis=axis, keepdims=True)
    elif pool_type == 'max':
        b_np = np.max(a_np, axis=axis, keepdims=True)
    b_np = np.maximum(b_np, 0.0)

    def check_device(device, ctx):
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            s_func = tvm.topi.testing.dispatch(device, _adaptive_pool_schedule)
            if device == "cuda":
                s = s_func(B, layout)
            else:
                s = s_func(B)
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device, ctx in tvm.testing.enabled_targets():
        check_device(device, ctx)
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        if not nvcc.have_tensorcore(ctx.compute_version):
            print("skip because gpu does not support Tensor Cores")
            return
        print("Running on target: %s" % device)
        with tvm.target.create(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv2d_nhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, 'float32')
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (
                batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (
                batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation))
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)
示例#29
0
    def check_device(device):
        ctx = tvm.context(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        if not nvcc.have_tensorcore(ctx.compute_version):
            print("skip because gpu does not support Tensor Cores")
            return
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = topi.testing.dispatch(device, _conv2d_hwnc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, dtype, "int32")
            s = fschedule([C])

        a = tvm.nd.array(a_np.transpose((1, 2, 0, 3)), ctx)
        w = tvm.nd.array(w_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)

        func = tvm.build(
            s,
            [A, W, C],
            device,
            name="relu_%d_%d_%d_%d_%d_%d_%d_%d"
            % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation),
        )
        func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy().transpose((2, 0, 1, 3)), c_np, rtol=rtol)
    def check_device(device, ctx):
        print("Running on target: %s" % device)
        fcompute, fschedule = tvm.topi.testing.dispatch(
            device, _conv3d_ncdhw_implement)
        with tvm.target.Target(device):
            C = fcompute(A, W, (stride, stride, stride), padding,
                         (dilation, dilation, dilation), dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C],
                             device,
                             name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                             (batch, in_channel, in_size, num_filter, kernel,
                              stride, padding_sum, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)