def verify_conv3d_ndhwc(
    batch,
    in_channel,
    in_size,
    num_filter,
    kernel,
    stride,
    padding,
    dilation=1,
    add_bias=False,
    add_relu=False,
    devices="cuda",
):
    """Test the conv3d with tensorcore for ndhwc layout"""
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel, kernel, kernel))
    padding_sum = pad_front + pad_top + pad_left + pad_back + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" %
          (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum,
           dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_depth, in_height, in_width, in_channel),
                       name="A")
    W = te.placeholder((kernel, kernel, kernel, in_channel, num_filter),
                       name="W")
    bias = te.placeholder((1, 1, 1, 1, num_filter), name="bias")

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ndhwc.verify_conv3d_ndhwc")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(w_np,
                                               (1, 1, 1, dilation, dilation))
        c_np = tvm.topi.testing.conv3d_ndhwc_python(a_np, dw_np, stride,
                                                    padding)
        if add_bias:
            b_np = np.random.uniform(size=bias_shape).astype(dtype)
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        dev = tvm.device(device, 0)
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv3d_ndhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, "float32")
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         dev)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.numpy(), c_np, rtol=rtol)

    check_device(devices)
Пример #2
0
def conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding, output_padding):
    """Transposed 3d convolution operator in NCDHW layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [in_channel, num_filter, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of two ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str
        Padding size

    output_padding : int or list/tuple of three ints
        Used to disambiguate output shape.

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_c, in_d, in_h, in_w = a_np.shape
    _, out_c, filter_d, filter_h, filter_w = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride
    if isinstance(output_padding, int):
        opad_d = opad_h = opad_w = output_padding
    else:
        opad_d, opad_h, opad_w = output_padding
    assert opad_d < stride_d and opad_h < stride_h and opad_w < stride_w

    # dilate stage
    dilated_a_np = tvm.topi.testing.dilate_python(
        a_np, [1, 1, stride_d, stride_h, stride_w])

    # padding stage
    fpad_front, fpad_top, fpad_left, fpad_back, fpad_bottom, fpad_right = get_pad_tuple3d(
        padding, (filter_d, filter_h, filter_w))

    bpad_front = filter_d - 1 - fpad_front
    bpad_back = filter_d - 1 - fpad_back + opad_d
    bpad_top = filter_h - 1 - fpad_top
    bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
    bpad_left = filter_w - 1 - fpad_left
    bpad_right = filter_w - 1 - fpad_right + opad_w

    padded_a_np = np.zeros((
        batch,
        in_c,
        dilated_a_np.shape[2] + bpad_front + bpad_back,
        dilated_a_np.shape[3] + bpad_top + bpad_bottom,
        dilated_a_np.shape[4] + bpad_left + bpad_right,
    ))

    padded_a_np[:, :, bpad_front:dilated_a_np.shape[2] + bpad_front,
                bpad_top:dilated_a_np.shape[3] + bpad_top,
                bpad_left:dilated_a_np.shape[4] + bpad_left, ] = dilated_a_np

    # convolution stage
    out_d = (in_d - 1) * stride_d - bpad_front - bpad_back + filter_d
    out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
    out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w

    w_np = np.flip(w_np, axis=[2, 3, 4]).transpose((1, 0, 2, 3, 4))
    b_np = tvm.topi.testing.conv3d_ncdhw_python(padded_a_np,
                                                w_np,
                                                stride=(1, 1, 1),
                                                padding=(0, 0, 0))

    return b_np
def verify_conv3d_ncdhw(batch,
                        in_channel,
                        in_size,
                        num_filter,
                        kernel,
                        stride,
                        padding,
                        dilation=1,
                        add_bias=False,
                        add_relu=False,
                        use_cudnn=False):
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel, kernel, kernel))
    padding_sum = pad_front + pad_back + pad_top + pad_left + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" %
          (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum,
           dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_channel, in_depth, in_height, in_width),
                       name="A")
    W = te.placeholder((num_filter, in_channel, kernel, kernel, kernel),
                       name="W")
    bias = te.placeholder((num_filter, 1, 1, 1), name="bias")

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(
            w_np, (1, 1, dilation, dilation, dilation))
        c_np = tvm.topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride,
                                                    padding)
        if add_bias:
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device, ctx):
        print("Running on target: %s" % device)
        if "cudnn" in device:
            fcompute, fschedule = topi.cuda.conv3d_cudnn, topi.cuda.schedule_conv3d_cudnn
        else:
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv3d_ncdhw_implement)
        with tvm.target.Target(device):
            if "cudnn" in device:
                C = fcompute(A, W, (stride, stride, stride), padding,
                             (dilation, dilation, dilation), "NCDHW", dtype)
            else:
                C = fcompute(A, W, (stride, stride, stride), padding,
                             (dilation, dilation, dilation), dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)

    for device, ctx in tvm.testing.enabled_targets():
        with autotvm.tophub.context(
                device):  # load tophub pre-tuned parameters
            check_device(device, ctx)
    if use_cudnn:
        check_device("cuda -model=unknown -libs=cudnn",
                     tvm.context("cuda -model=unknown -libs=cudnn", 0))
Пример #4
0
def verify_conv3d_ncdhw(
    batch,
    in_channel,
    in_size,
    num_filter,
    kernel,
    stride,
    padding,
    dilation=1,
    groups=1,
    add_bias=False,
    add_relu=False,
):
    if isinstance(kernel, (tuple, list)):
        if len(kernel) == 3:
            kernel_d = kernel[0]
            kernel_h = kernel[1]
            kernel_w = kernel[2]
        else:
            raise ValueError("Size of kernel can only be 3")
    elif isinstance(kernel, int):
        kernel_d = kernel_h = kernel_w = kernel
    else:
        raise ValueError("Unknown kernel option %s" % kernel)
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel_d, kernel_h, kernel_w)
    )
    padding_sum = pad_front + pad_back + pad_top + pad_left + pad_bottom + pad_right
    print(
        "Workload: (%d, %d, %d, %d, %d, %d, %d, %d, %d, %d)"
        % (
            batch,
            in_channel,
            in_size,
            num_filter,
            kernel_d,
            kernel_h,
            kernel_w,
            stride,
            padding_sum,
            dilation,
        )
    )

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name="A")
    W = te.placeholder((num_filter, in_channel // groups, kernel_d, kernel_h, kernel_w), name="W")
    bias = te.placeholder((num_filter, 1, 1, 1), name="bias")

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(w_np, (1, 1, dilation, dilation, dilation))
        c_np = tvm.topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride, padding, groups)
        if add_bias:
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_target(target, dev):
        print("Running on target: %s" % target)
        fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv3d_ncdhw_implement)
        with tvm.target.Target(target):
            C = fcompute(
                A,
                W,
                (stride, stride, stride),
                padding,
                (dilation, dilation, dilation),
                groups,
                dtype,
            )
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                target,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d_%d_%d"
                % (
                    batch,
                    in_channel,
                    in_size,
                    num_filter,
                    kernel_d,
                    kernel_h,
                    kernel_w,
                    stride,
                    padding_sum,
                    dilation,
                    groups,
                ),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                target,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d_%d_%d_%d"
                % (
                    batch,
                    in_channel,
                    in_size,
                    num_filter,
                    kernel_d,
                    kernel_h,
                    kernel_w,
                    stride,
                    padding_sum,
                    dilation,
                    groups,
                ),
            )
            func(a, w, c)
        tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-4, atol=1e-6)

    for target, dev in tvm.testing.enabled_targets():
        with autotvm.tophub.context(target):  # load tophub pre-tuned parameters
            check_target(target, dev)
Пример #5
0
def _conv3d_ndhwc_python(a_np, w_np, stride, padding):
    """Convolution 3D operator in NDHWC layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of three ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str or a list/tuple of three ints
        Padding size, or ['VALID', 'SAME'], or [pad_depth, pad_height, pad_width]

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_depth, in_height, in_width, in_channel = a_np.shape
    kernel_d, kernel_h, kernel_w, _, num_filter = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel_d, kernel_h, kernel_w)
    )
    pad_d = pad_front + pad_back
    pad_h = pad_top + pad_bottom
    pad_w = pad_left + pad_right
    # compute the output shape
    out_channel = num_filter
    out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1
    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
    # change the layout from NHWC to NCHW
    at = a_np.transpose((0, 4, 1, 2, 3))
    wt = w_np.transpose((4, 3, 0, 1, 2))
    bt = np.zeros((batch, out_channel, out_depth, out_height, out_width), dtype=a_np.dtype)
    # computation
    for n in range(batch):
        for f in range(out_channel):
            for c in range(in_channel):
                if pad_d > 0 or pad_h > 0 or pad_w > 0:
                    apad = np.zeros(
                        (in_depth + pad_d, in_height + pad_h, in_width + pad_w), dtype=a_np.dtype
                    )
                    apad[
                        pad_front : pad_front + in_depth,
                        pad_top : pad_top + in_height,
                        pad_left : pad_left + in_width,
                    ] = at[n, c]
                else:
                    apad = at[n, c]
                out = scipy.signal.convolve(apad, np.flip(wt[f, c]), mode="valid")
                bt[n, f] += out[::stride_d, ::stride_h, ::stride_w]
    return bt.transpose((0, 2, 3, 4, 1))