Exemplo n.º 1
0
def _conv3d_ncdhw_python(a_np, w_np, stride, padding):
    batch, in_channel, in_depth, in_height, in_width = a_np.shape
    num_filter, _, kernel_d, kernel_h, kernel_w = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = \
        get_pad_tuple3d(padding, (kernel_d, kernel_h, kernel_w))
    pad_d = pad_front + pad_back
    pad_h = pad_top + pad_bottom
    pad_w = pad_left + pad_right

    # compute the output shape
    out_channel = num_filter
    out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1
    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
    b_np = np.zeros((batch, out_channel, out_depth, out_height, out_width))
    # computation
    for n in range(batch):
        for f in range(out_channel):
            for c in range(in_channel):
                if pad_d > 0 or pad_h > 0 or pad_w > 0:
                    apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w))
                    apad[pad_front:pad_front + in_depth, pad_top:pad_top + in_height,\
                         pad_left:pad_left + in_width] = a_np[n, c]
                else:
                    apad = a_np[n, c]
                out = scipy.signal.convolve(
                    apad, np.flip(w_np[f, c]), mode='valid')
                b_np[n, f] += out[::stride_d, ::stride_h, ::stride_w]
    return b_np
def verify_conv3d_ncdhw(batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation=1, add_bias=False, add_relu=False):
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(padding, (kernel, kernel, kernel))
    padding_sum = pad_front + pad_back + pad_top + pad_left + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" % (batch, in_channel, in_size, num_filter, kernel, stride,
          padding_sum, dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_channel, in_depth, in_height, in_width), name='A')
    W = te.placeholder((num_filter, in_channel, kernel, kernel, kernel), name='W')
    bias = te.placeholder((num_filter, 1, 1, 1), name='bias')

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ncdhw.verify_conv3d_ncdhw")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(w_np, (1, 1, dilation, dilation, dilation))
        c_np = tvm.topi.testing.conv3d_ncdhw_python(a_np, dw_np, stride, padding)
        if add_bias:
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device, ctx):
        print("Running on target: %s" % device)
        fcompute, fschedule = tvm.topi.testing.dispatch(device, _conv3d_ncdhw_implement)
        with tvm.target.create(device):
            C = fcompute(A, W, (stride, stride, stride), padding,
                         (dilation, dilation, dilation), dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), ctx)
        if add_bias:
            func = tvm.build(s, [A, W, bias, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation))
            func(a, w, b, c)
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum, dilation))
            func(a, w, c)
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-4)

    for device, ctx in tvm.testing.enabled_targets():
        with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
            check_device(device, ctx)
Exemplo n.º 3
0
def conv3d_ndhwc_python(a_np, w_np, stride, padding):
    """Convolution 3D operator in NDHWC layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [num_filter, in_channel, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of three ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str or a list/tuple of three ints
        Padding size, or ['VALID', 'SAME'], or [pad_depth, pad_height, pad_width]
    groups : int
        Number of groups

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_depth, in_height, in_width, in_channel = a_np.shape
    kernel_d, kernel_h, kernel_w, _, num_filter = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride

    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel_d, kernel_h, kernel_w)
    )
    pad_d = pad_front + pad_back
    pad_h = pad_top + pad_bottom
    pad_w = pad_left + pad_right
    # compute the output shape
    out_channel = num_filter
    out_depth = (in_depth - kernel_d + pad_d) // stride_d + 1
    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
    # change the layout from NHWC to NCHW
    at = a_np.transpose((0, 4, 1, 2, 3))
    wt = w_np.transpose((4, 3, 0, 1, 2))
    bt = np.zeros((batch, out_channel, out_depth, out_height, out_width))
    # computation
    for n in range(batch):
        for f in range(out_channel):
            for c in range(in_channel):
                if pad_d > 0 or pad_h > 0 or pad_w > 0:
                    apad = np.zeros((in_depth + pad_d, in_height + pad_h, in_width + pad_w))
                    apad[
                        pad_front : pad_front + in_depth,
                        pad_top : pad_top + in_height,
                        pad_left : pad_left + in_width,
                    ] = at[n, c]
                else:
                    apad = at[n, c]
                out = scipy.signal.convolve(apad, np.flip(wt[f, c]), mode="valid")
                bt[n, f] += out[::stride_d, ::stride_h, ::stride_w]
    return bt.transpose((0, 2, 3, 4, 1))
def conv3d_transpose_ncdhw_python(a_np, w_np, stride, padding, output_padding):
    """Transposed 3d convolution operator in NCDHW layout.

    Parameters
    ----------
    a_np : numpy.ndarray
        5-D with shape [batch, in_channel, in_depth, in_height, in_width]

    w_np : numpy.ndarray
        5-D with shape [in_channel, num_filter, filter_depth, filter_height, filter_width]

    stride : int or a list/tuple of two ints
        Stride size, or [stride_depth, stride_height, stride_width]

    padding : int or str
        Padding size

    output_padding : int or list/tuple of three ints
        Used to disambiguate output shape.

    Returns
    -------
    b_np : np.ndarray
        5-D with shape [batch, out_channel, out_depth, out_height, out_width]
    """
    batch, in_c, in_d, in_h, in_w = a_np.shape
    _, out_c, filter_d, filter_h, filter_w = w_np.shape
    if isinstance(stride, int):
        stride_d = stride_h = stride_w = stride
    else:
        stride_d, stride_h, stride_w = stride
    if isinstance(output_padding, int):
        opad_d = opad_h = opad_w = output_padding
    else:
        opad_d, opad_h, opad_w = output_padding
    assert opad_d < stride_d and opad_h < stride_h and opad_w < stride_w

    # dilate stage
    dilated_a_np = tvm.topi.testing.dilate_python(
        a_np, [1, 1, stride_d, stride_h, stride_w])

    # padding stage
    fpad_front, fpad_top, fpad_left, fpad_back, fpad_bottom, fpad_right = get_pad_tuple3d(
        padding, (filter_d, filter_h, filter_w))

    bpad_front = filter_d - 1 - fpad_front
    bpad_back = filter_d - 1 - fpad_back + opad_d
    bpad_top = filter_h - 1 - fpad_top
    bpad_bottom = filter_h - 1 - fpad_bottom + opad_h
    bpad_left = filter_w - 1 - fpad_left
    bpad_right = filter_w - 1 - fpad_right + opad_w

    padded_a_np = np.zeros((
        batch,
        in_c,
        dilated_a_np.shape[2] + bpad_front + bpad_back,
        dilated_a_np.shape[3] + bpad_top + bpad_bottom,
        dilated_a_np.shape[4] + bpad_left + bpad_right,
    ))

    padded_a_np[:, :, bpad_front:dilated_a_np.shape[2] + bpad_front,
                bpad_top:dilated_a_np.shape[3] + bpad_top,
                bpad_left:dilated_a_np.shape[4] + bpad_left, ] = dilated_a_np

    # convolution stage
    out_d = (in_d - 1) * stride_d - bpad_front - bpad_back + filter_d
    out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
    out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w

    w_np = np.flip(w_np, axis=[2, 3, 4]).transpose((1, 0, 2, 3, 4))
    b_np = tvm.topi.testing.conv3d_ncdhw_python(padded_a_np,
                                                w_np,
                                                stride=(1, 1, 1),
                                                padding=(0, 0, 0))

    return b_np
Exemplo n.º 5
0
def verify_conv3d_ndhwc(
    batch,
    in_channel,
    in_size,
    num_filter,
    kernel,
    stride,
    padding,
    dilation=1,
    add_bias=False,
    add_relu=False,
    devices="cuda",
):
    """Test the conv3d with tensorcore for ndhwc layout"""
    pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right = get_pad_tuple3d(
        padding, (kernel, kernel, kernel))
    padding_sum = pad_front + pad_top + pad_left + pad_back + pad_bottom + pad_right
    print("Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" %
          (batch, in_channel, in_size, num_filter, kernel, stride, padding_sum,
           dilation))

    in_depth = in_height = in_width = in_size

    A = te.placeholder((batch, in_depth, in_height, in_width, in_channel),
                       name="A")
    W = te.placeholder((kernel, kernel, kernel, in_channel, num_filter),
                       name="W")
    bias = te.placeholder((1, 1, 1, 1, num_filter), name="bias")

    a_shape = get_const_tuple(A.shape)
    w_shape = get_const_tuple(W.shape)
    bias_shape = get_const_tuple(bias.shape)
    dtype = A.dtype

    @memoize("topi.tests.test_topi_conv3d_ndhwc.verify_conv3d_ndhwc")
    def get_ref_data():
        a_np = np.random.uniform(size=a_shape).astype(dtype)
        w_np = np.random.uniform(size=w_shape).astype(dtype)
        b_np = np.random.uniform(size=bias_shape).astype(dtype)
        dw_np = tvm.topi.testing.dilate_python(w_np,
                                               (1, 1, 1, dilation, dilation))
        c_np = tvm.topi.testing.conv3d_ndhwc_python(a_np, dw_np, stride,
                                                    padding)
        if add_bias:
            b_np = np.random.uniform(size=bias_shape).astype(dtype)
            c_np += b_np
        if add_relu:
            c_np = np.maximum(c_np, 0)
        return a_np, w_np, b_np, c_np

    a_np, w_np, b_np, c_np = get_ref_data()

    def check_device(device):
        ctx = tvm.context(device, 0)
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv3d_ndhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, "float32")
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, ctx)
        w = tvm.nd.array(w_np, ctx)
        b = tvm.nd.array(b_np, ctx)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         ctx)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)

    check_device(devices)