Exemplo n.º 1
0
    def __init__(self, config, name="Convolution"):
        super().__init__(config, name)
        self.output = None

        self.W = self.load_tensor(config, 0)

        self.alpha = 1.0
        self.beta = 0.0

        self.in_desc = None
        self.out_desc = None

        self.num_filter_maps = self.W.shape[0]
        self.num_filter_channels = self.W.shape[1]

        self.bias = self.load_tensor(config, 1, shape=(1, self.num_filter_maps, 1, 1))

        # assert(self.bias.shape[0] == self.num_filter_maps)
        # self.bias = self.bias.reshape((1, self.num_filter_maps, 1, 1))
        # print(self.bias.shape)
        self.b_desc = self.bias.get_cudnn_tensor_desc()

        self.filt_desc = libcudnn.cudnnCreateFilterDescriptor()
        print("FILT:", self.W.dtype, gputensor.np_2_cudnn_dtype[self.W.dtype])
        print("FILT:", self.W.shape, self.num_filter_maps, self.num_filter_channels, self.kH, self.kW)
        libcudnn.cudnnSetFilter4dDescriptor(self.filt_desc, 
                gputensor.np_2_cudnn_dtype[self.W.dtype], self.num_filter_maps,
                self.num_filter_channels, self.kH, self.kW)

        # print("B:", self.bias.shape)
        # self.bias_desc = 
        self.conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
        libcudnn.cudnnSetConvolution2dDescriptor(self.conv_desc, self.padH, self.padW,
                self.dH, self.dW, 1, 1, self.convolution_mode)
Exemplo n.º 2
0
def get_conv2d_desc(pad, stride, mode=_default_conv_mode):
    """Create a 2d convolution descriptor."""
    desc = libcudnn.cudnnCreateConvolutionDescriptor()
    libcudnn.cudnnSetConvolution2dDescriptor(desc, pad[0], pad[1], stride[0],
                                             stride[1], 1, 1, mode)
    return Auto(desc, libcudnn.cudnnDestroyConvolutionDescriptor)
Exemplo n.º 3
0
def get_conv2d_desc(pad, stride, mode=_default_conv_mode):
    """Create a 2d convolution descriptor."""
    desc = libcudnn.cudnnCreateConvolutionDescriptor()
    libcudnn.cudnnSetConvolution2dDescriptor(
        desc, pad[0], pad[1], stride[0], stride[1], 1, 1, mode)
    return Auto(desc, libcudnn.cudnnDestroyConvolutionDescriptor)
Exemplo n.º 4
0
    np.random.rand(filters_out, filters_in, height_filter,
                   width_filter).astype(np.float32))

# Descriptor for input
X_desc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensor4dDescriptor(X_desc, tensor_format, data_type, n_input,
                                    filters_in, height_in, width_in)

# Filter descriptor
filters_desc = libcudnn.cudnnCreateFilterDescriptor()
libcudnn.cudnnSetFilter4dDescriptor(filters_desc, data_type, tensor_format,
                                    filters_out, filters_in, height_filter,
                                    width_filter)

# Convolution descriptor
conv_desc = libcudnn.cudnnCreateConvolutionDescriptor()
libcudnn.cudnnSetConvolution2dDescriptor(conv_desc, pad_h, pad_w,
                                         vertical_stride, horizontal_stride,
                                         upscalex, upscaley, convolution_mode,
                                         data_type)

# Get output dimensions (first two values are n_input and filters_out)
_, _, height_output, width_output = libcudnn.cudnnGetConvolution2dForwardOutputDim(
    conv_desc, X_desc, filters_desc)

# Output tensor
Y = gpuarray.empty((n_input, filters_out, height_output, width_output),
                   np.float32)
Y_desc = libcudnn.cudnnCreateTensorDescriptor()
libcudnn.cudnnSetTensor4dDescriptor(Y_desc, tensor_format, data_type, n_input,
                                    filters_out, height_output, width_output)
Exemplo n.º 5
0
def start_bench():
    start.record()

def end_bench(op):
    end.record()
    end.synchronize()
    msecs  = end.time_since(start) / repeat
    gflops = conv.flops / (msecs * 1000000.0)
    print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)

ng = NervanaGPU(stochastic_round=False, bench=True)

# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()

C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()

# Set some options and tensor dimensions
NCHW_fmt  = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype  = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION']
fwd_pref  = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_NO_WORKSPACE']
# CUDNN_CONVOLUTION_FWD_NO_WORKSPACE
# CUDNN_CONVOLUTION_FWD_PREFER_FASTEST