Пример #1
0
def get_conv_bias_desc(x):
    """Create a bias tensor descriptor."""
    desc = libcudnn.cudnnCreateTensorDescriptor()
    libcudnn.cudnnSetTensor4dDescriptor(
        desc, libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW'],
        _dtypes[x.dtype], 1, x.size, 1, 1)
    return Auto(desc, libcudnn.cudnnDestroyTensorDescriptor)
Пример #2
0
def get_conv_bias_desc(x):
    """Create a bias tensor descriptor."""
    desc = libcudnn.cudnnCreateTensorDescriptor()
    libcudnn.cudnnSetTensor4dDescriptor(
        desc, libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW'], _dtypes[x.dtype],
        1, x.size, 1, 1)
    return Auto(desc, libcudnn.cudnnDestroyTensorDescriptor)
Пример #3
0
def get_tensor_desc(x, h, w, form='CUDNN_TENSOR_NCHW'):
    """Create a tensor descriptor for given settings."""
    n = x.shape[0] if len(x.shape) >= 1 else 1
    c = x.size // (n * h * w)
    desc = libcudnn.cudnnCreateTensorDescriptor()
    libcudnn.cudnnSetTensor4dDescriptor(desc, libcudnn.cudnnTensorFormat[form],
                                        _dtypes[x.dtype], n, c, h, w)
    return Auto(desc, libcudnn.cudnnDestroyTensorDescriptor)
Пример #4
0
def get_tensor_desc(x, h, w, form='CUDNN_TENSOR_NCHW'):
    """Create a tensor descriptor for given settings."""
    n = x.shape[0]
    c = x.size // (n * h * w)
    desc = libcudnn.cudnnCreateTensorDescriptor()
    libcudnn.cudnnSetTensor4dDescriptor(
        desc, libcudnn.cudnnTensorFormat[form], _dtypes[x.dtype], n, c, h, w)
    return Auto(desc, libcudnn.cudnnDestroyTensorDescriptor)
Пример #5
0
    def __init__(self,
                 shape,
                 dtype,
                 fmt=libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']):
        self.ptr = libcudnn.cudnnCreateTensorDescriptor()

        libcudnn.cudnnSetTensor4dDescriptor(self.ptr, fmt, dtype, shape[0],
                                            shape[1], shape[2], shape[3])
Пример #6
0
def benchmark_conv(kw, kh, bsz):

    start, end = (drv.Event(), drv.Event())

    def start_bench():
        start.record()

    def end_bench():
        end.record()
        end.synchronize()
        return end.time_since(start)
    n_input = bsz

    filters_in = 3
    filters_out = 64
    height_in = 224
    width_in = 224
    height_filter = kh
    width_filter = kw
    pad_h = 3
    pad_w = 3
    vertical_stride = 1
    horizontal_stride = 1
    upscalex = 1
    upscaley = 1
    alpha = 1.0
    beta = 1.0

    # Input tensor
    X = gpuarray.to_gpu(np.random.rand(n_input, filters_in, height_in, width_in)
        .astype(np.float32))

    # Filter tensor
    filters = gpuarray.to_gpu(np.random.rand(filters_out,
        filters_in, height_filter, width_filter).astype(np.float32))

    # Descriptor for input
    X_desc = libcudnn.cudnnCreateTensorDescriptor()
    libcudnn.cudnnSetTensor4dDescriptor(X_desc, tensor_format, data_type,
        n_input, filters_in, height_in, width_in)

    # Filter descriptor
    filters_desc = libcudnn.cudnnCreateFilterDescriptor()
    libcudnn.cudnnSetFilter4dDescriptor(filters_desc, data_type, filters_out,
        filters_in, height_filter, width_filter)

    # Convolution descriptor
    conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
    libcudnn.cudnnSetConvolution2dDescriptor(conv_desc, pad_h, pad_w,
        vertical_stride, horizontal_stride, upscalex, upscaley,
        convolution_mode)

    # Get output dimensions (first two values are n_input and filters_out)
    _, _, height_output, width_output = libcudnn.cudnnGetConvolution2dForwardOutputDim(
        conv_desc, X_desc, filters_desc)

    # Output tensor
    Y = gpuarray.empty((n_input, filters_out, height_output, width_output), np.float32)
    y_desc = libcudnn.cudnncreatetensordescriptor()
    libcudnn.cudnnsettensor4ddescriptor(y_desc, tensor_format, data_type, n_input,
        filters_out, height_output, width_output)

    # Get pointers to GPU memory
    X_data = ctypes.c_void_p(int(X.gpudata))
    filters_data = ctypes.c_void_p(int(filters.gpudata))
    Y_data = ctypes.c_void_p(int(Y.gpudata))

    # Perform convolution
    algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn_context, X_desc,
        filters_desc, conv_desc, Y_desc, convolution_fwd_pref, 0)

    # print("Cudnn algorithm = %d" % algo.value)

    ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(cudnn_context, X_desc, filters_desc, conv_desc, Y_desc, algo)
    ws_ptr  = drv.mem_alloc(ws_size.value) if ws_size.value > 0 else 0
    ws_data = ctypes.c_void_p(int(ws_ptr))

    libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
        filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
        Y_desc, Y_data)
    start_bench()

    for i in range(10):
        libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
            filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
            Y_desc, Y_data)

    ms = end_bench()

    ws_ptr = None
    libcudnn.cudnnDestroyTensorDescriptor(X_desc)
    libcudnn.cudnnDestroyTensorDescriptor(Y_desc)
    libcudnn.cudnnDestroyFilterDescriptor(filters_desc)
    libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc)

    return ms / 10
Пример #7
0
numlayers = 2
inputmode = 0
direction = 0
mode = 0
datatype = 0

handle = libcudnn.cudnnCreate()

rnndesc = libcudnn.cudnnCreateRNNDescriptor()
dropoutdesc = libcudnn.cudnnCreateDropoutDescriptor()
cudnnSetDropoutDescriptor(dropoutdesc, handle, 0, 0, 0, 0)
libcudnn.cudnnSetRNNDescriptor(rnndesc, hiddensize, seqlength, numlayers,
                               dropoutdesc, inputmode, direction, mode,
                               datatype)

xdescs = [libcudnn.cudnnCreateTensorDescriptor() for _ in xrange(seqlength)]
[
    libcudnn.cudnnSetTensorNdDescriptor(xdesc, 0, 3,
                                        [inputsize, minibatch, seqlength])
    for xdesc in xdescs
]

hxdesc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensorNdDescriptor(hxdesc, 0, 3,
                                    [hiddensize, minibatch, numlayers])

cxdesc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensorNdDescriptor(cxdesc, 0, 3,
                                    [hiddensize, minibatch, numlayers])

paramssize = libcudnn.cudnnGetRNNParamsSize(handle, rnndesc, xdescs)
Пример #8
0
upscaley = 1
alpha = 1.0
beta = 1.0

# Input tensor
X = gpuarray.to_gpu(
    np.random.rand(n_input, filters_in, height_in,
                   width_in).astype(np.float32))

# Filter tensor
filters = gpuarray.to_gpu(
    np.random.rand(filters_out, filters_in, height_filter,
                   width_filter).astype(np.float32))

# Descriptor for input
X_desc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensor4dDescriptor(X_desc, tensor_format, data_type, n_input,
                                    filters_in, height_in, width_in)

# Filter descriptor
filters_desc = libcudnn.cudnnCreateFilterDescriptor()
libcudnn.cudnnSetFilter4dDescriptor(filters_desc, data_type, tensor_format,
                                    filters_out, filters_in, height_filter,
                                    width_filter)

# Convolution descriptor
conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
libcudnn.cudnnSetConvolution2dDescriptor(conv_desc, pad_h, pad_w,
                                         vertical_stride, horizontal_stride,
                                         upscalex, upscaley, convolution_mode,
                                         data_type)
Пример #9
0
    start.record()

def end_bench(op):
    end.record()
    end.synchronize()
    msecs  = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)

ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt  = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype  = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
fwd_pref  = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_NO_WORKSPACE']
# CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
# CUDNN_CONVOLUTION_FWD_PREFER_FASTEST

                # N    C   K  D    H   W  T  R  S   pad    str
Пример #10
0
    def __init__(self, shape, dtype, fmt=libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']):
        self.ptr = libcudnn.cudnnCreateTensorDescriptor()

        libcudnn.cudnnSetTensor4dDescriptor(self.ptr, fmt, dtype,
                shape[0], shape[1], shape[2], shape[3])
Пример #11
0
horizontal_stride = 1
upscalex = 1
upscaley = 1
alpha = 1.0
beta = 1.0

# Input tensor
X = gpuarray.to_gpu(np.random.rand(n_input, filters_in, height_in, width_in)
    .astype(np.float32))

# Filter tensor
filters = gpuarray.to_gpu(np.random.rand(filters_out,
    filters_in, height_filter, width_filter).astype(np.float32))

# Descriptor for input
X_desc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensor4dDescriptor(X_desc, tensor_format, data_type,
    n_input, filters_in, height_in, width_in)

# Filter descriptor
filters_desc = libcudnn.cudnnCreateFilterDescriptor()
libcudnn.cudnnSetFilter4dDescriptor(filters_desc, data_type, filters_out,
    filters_in, height_filter, width_filter)

# Convolution descriptor
conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
libcudnn.cudnnSetConvolution2dDescriptor(conv_desc, pad_h, pad_w,
    vertical_stride, horizontal_stride, upscalex, upscaley,
    convolution_mode)

# Get output dimensions (first two values are n_input and filters_out)
Пример #12
0
def end_bench(op):
    end.record()
    end.synchronize()
    msecs = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)


ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
fwd_pref = libcudnn.cudnnConvolutionFwdPreference[
    'CUDNN_CONVOLUTION_FWD_NO_WORKSPACE']
# CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
# CUDNN_CONVOLUTION_FWD_PREFER_FASTEST
Пример #13
0
numlayers = 2
inputmode = 0
direction = 0
mode = 0
datatype = 0

handle = libcudnn.cudnnCreate()

rnndesc = libcudnn.cudnnCreateRNNDescriptor()
dropoutdesc = libcudnn.cudnnCreateDropoutDescriptor()
cudnnSetDropoutDescriptor(dropoutdesc, handle, 0, 0, 0, 0)
libcudnn.cudnnSetRNNDescriptor(rnndesc, hiddensize, seqlength, numlayers, 
					  dropoutdesc, inputmode, direction, mode, datatype)


xdescs = [libcudnn.cudnnCreateTensorDescriptor() for _ in xrange(seqlength)]
[libcudnn.cudnnSetTensorNdDescriptor(xdesc, 0, 3, [inputsize, minibatch, seqlength]) for xdesc in xdescs]

hxdesc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensorNdDescriptor(hxdesc, 0, 3, [hiddensize, minibatch, numlayers])

cxdesc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensorNdDescriptor(cxdesc, 0, 3, [hiddensize, minibatch, numlayers])

paramssize = libcudnn.cudnnGetRNNParamsSize(handle, rnndesc, xdescs)

wdesc = libcudnn.cudnnCreateFilterDescriptor()
libcudnn.cudnnSetFilterNdDescriptor(wdesc, 0, 0, 3, [paramssize, 1, 1])

ydescs = [libcudnn.cudnnCreateTensorDescriptor() for _ in xrange(seqlength)]
[libcudnn.cudnnSetTensorNdDescriptor(ydesc, 0, 3, [hiddensize, minibatch, seqlength]) for ydesc in ydescs]