def __init__(self, config, name="Convolution"): super().__init__(config, name) self.output = None self.W = self.load_tensor(config, 0) self.alpha = 1.0 self.beta = 0.0 self.in_desc = None self.out_desc = None self.num_filter_maps = self.W.shape[0] self.num_filter_channels = self.W.shape[1] self.bias = self.load_tensor(config, 1, shape=(1, self.num_filter_maps, 1, 1)) # assert(self.bias.shape[0] == self.num_filter_maps) # self.bias = self.bias.reshape((1, self.num_filter_maps, 1, 1)) # print(self.bias.shape) self.b_desc = self.bias.get_cudnn_tensor_desc() self.filt_desc = libcudnn.cudnnCreateFilterDescriptor() print("FILT:", self.W.dtype, gputensor.np_2_cudnn_dtype[self.W.dtype]) print("FILT:", self.W.shape, self.num_filter_maps, self.num_filter_channels, self.kH, self.kW) libcudnn.cudnnSetFilter4dDescriptor(self.filt_desc, gputensor.np_2_cudnn_dtype[self.W.dtype], self.num_filter_maps, self.num_filter_channels, self.kH, self.kW) # print("B:", self.bias.shape) # self.bias_desc = self.conv_desc = libcudnn.cudnnCreateConvolutionDescriptor() libcudnn.cudnnSetConvolution2dDescriptor(self.conv_desc, self.padH, self.padW, self.dH, self.dW, 1, 1, self.convolution_mode)
def get_conv2d_desc(pad, stride, mode=_default_conv_mode): """Create a 2d convolution descriptor.""" desc = libcudnn.cudnnCreateConvolutionDescriptor() libcudnn.cudnnSetConvolution2dDescriptor(desc, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) return Auto(desc, libcudnn.cudnnDestroyConvolutionDescriptor)
def get_conv2d_desc(pad, stride, mode=_default_conv_mode): """Create a 2d convolution descriptor.""" desc = libcudnn.cudnnCreateConvolutionDescriptor() libcudnn.cudnnSetConvolution2dDescriptor( desc, pad[0], pad[1], stride[0], stride[1], 1, 1, mode) return Auto(desc, libcudnn.cudnnDestroyConvolutionDescriptor)
np.random.rand(filters_out, filters_in, height_filter, width_filter).astype(np.float32)) # Descriptor for input X_desc = libcudnn.cudnnCreateTensorDescriptor() libcudnn.cudnnSetTensor4dDescriptor(X_desc, tensor_format, data_type, n_input, filters_in, height_in, width_in) # Filter descriptor filters_desc = libcudnn.cudnnCreateFilterDescriptor() libcudnn.cudnnSetFilter4dDescriptor(filters_desc, data_type, tensor_format, filters_out, filters_in, height_filter, width_filter) # Convolution descriptor conv_desc = libcudnn.cudnnCreateConvolutionDescriptor() libcudnn.cudnnSetConvolution2dDescriptor(conv_desc, pad_h, pad_w, vertical_stride, horizontal_stride, upscalex, upscaley, convolution_mode, data_type) # Get output dimensions (first two values are n_input and filters_out) _, _, height_output, width_output = libcudnn.cudnnGetConvolution2dForwardOutputDim( conv_desc, X_desc, filters_desc) # Output tensor Y = gpuarray.empty((n_input, filters_out, height_output, width_output), np.float32) Y_desc = libcudnn.cudnnCreateTensorDescriptor() libcudnn.cudnnSetTensor4dDescriptor(Y_desc, tensor_format, data_type, n_input, filters_out, height_output, width_output)
def start_bench(): start.record() def end_bench(op): end.record() end.synchronize() msecs = end.time_since(start) / repeat gflops = conv.flops / (msecs * 1000000.0) print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv) ng = NervanaGPU(stochastic_round=False, bench=True) # Create a cuDNN context cudnn = libcudnn.cudnnCreate() C_desc = libcudnn.cudnnCreateConvolutionDescriptor() I_desc = libcudnn.cudnnCreateTensorDescriptor() O_desc = libcudnn.cudnnCreateTensorDescriptor() E_desc = libcudnn.cudnnCreateTensorDescriptor() B_desc = libcudnn.cudnnCreateTensorDescriptor() F_desc = libcudnn.cudnnCreateFilterDescriptor() U_desc = libcudnn.cudnnCreateFilterDescriptor() # Set some options and tensor dimensions NCHW_fmt = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW'] cu_dtype = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT'] conv_mode = libcudnn.cudnnConvolutionMode['CUDNN_CROSS_CORRELATION'] fwd_pref = libcudnn.cudnnConvolutionFwdPreference['CUDNN_CONVOLUTION_FWD_NO_WORKSPACE'] # CUDNN_CONVOLUTION_FWD_NO_WORKSPACE # CUDNN_CONVOLUTION_FWD_PREFER_FASTEST