def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") # TODO Why is it CPU?? print "Por que?!?!", type(x) cpu = "Cuda" not in str(type(x)) if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 op_axes = ("c", 0, 1, "t", "b") if tuple(x_axes) != op_axes: print "ssssssssssssssss" x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) _x_4d_shape = ( self.signal_shape[0], self.signal_shape[1], self.signal_shape[2], self.signal_shape[3] * self.signal_shape[4], ) x = x.reshape(_x_4d_shape) x = gpu_contiguous(x) rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(x, self._filters) if cpu: rval = host_from_gpu(rval) rval = rval.reshape( ( self.filter_shape[3], self.filter_shape[4], rval.shape[1], rval.shape[2], self.signal_shape[3], self.signal_shape[4], ) ) rval = diagonal_subtensor(rval, 4, 0).sum(axis=0) # Format the output based on the output space rval_axes = self.output_axes assert len(rval_axes) == 5 if tuple(rval_axes) != op_axes: rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) assert cpu if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 #x = shapeprint(x) op_axes = ('b', 0, 1, 't', 'c') print x_axes, op_axes if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) #x = shapeprint(x) #self._filters = shapeprint(self._filters) im = x.dimshuffle(0,3,4,1,2) filt = self._filters.dimshuffle(0,3,4,1,2) rval = conv3d(im, filt, None, None, (self.kernel_stride[0], self.kernel_stride[1]) ) rval = rval.dimshuffle(0,3,4,1,2) return rval
def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) assert cpu if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 #x = shapeprint(x) op_axes = ('b', 0, 1, 't', 'c') print x_axes, op_axes if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) #x = shapeprint(x) #self._filters = shapeprint(self._filters) rval = self.conv3d_op(x, self._filters, self.b, self.kernel_stride) #assert len(rval_axes) == 5 #op_axes = self.output_axes #if tuple(rval_axes) != op_axes: # rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def convnet_available(): check_cuda(check_enabled=False) # If already compiled, OK if convnet_available.compiled: _logger.debug('already compiled') return True # If there was an error, do not try again if convnet_available.compile_error: _logger.debug('error last time') return False # Else, we need CUDA if not cuda.cuda_available: convnet_available.compile_error = True _logger.debug('cuda unavailable') return False # Try to actually compile success = convnet_compile() if success: convnet_available.compiled = True else: convnet_available.compile_error = False _logger.debug('compilation success: %s', success) return convnet_available.compiled
def lmul_T(self, x): check_cuda(str(type(self)) + ".lmul_T") assert x.dtype == self._filters.dtype op_axes = ('c', 0, 1, 'b') axes = self.output_axes if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) x = gpu_contiguous(x) rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum, stride=self.kernel_stride[0])(x, self._filters) # Format the output based on the input space axes = self.input_axes assert len(axes) == 4 if tuple(axes) != op_axes: rval = rval.dimshuffle(op_axes.index(axes[0]), op_axes.index(axes[1]), op_axes.index(axes[2]), op_axes.index(axes[3])) return rval
def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) assert cpu if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 #x = shapeprint(x) op_axes = ('b', 0, 1, 't', 'c') print x_axes, op_axes if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) #x = shapeprint(x) #self._filters = shapeprint(self._filters) im = x.dimshuffle(0, 3, 4, 1, 2) filt = self._filters.dimshuffle(0, 3, 4, 1, 2) rval = conv3d(im, filt, None, None, (self.kernel_stride[0], self.kernel_stride[1])) rval = rval.dimshuffle(0, 3, 4, 1, 2) return rval
def lmul_T(self, x): """ .. todo:: WRITEME """ check_cuda(str(type(self)) + ".lmul_T") assert x.dtype == self._filters.dtype op_axes = ("c", 0, 1, "b") axes = self.output_axes if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) x = gpu_contiguous(x) rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum, stride=self.kernel_stride[0])(x, self._filters) # Format the output based on the input space axes = self.input_axes assert len(axes) == 4 if tuple(axes) != op_axes: rval = rval.dimshuffle( op_axes.index(axes[0]), op_axes.index(axes[1]), op_axes.index(axes[2]), op_axes.index(axes[3]) ) return rval
def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) #assert cpu if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 #x = shapeprint(x) op_axes = ('b', 'c', 0, 1, 't') print x_axes, op_axes #if tuple(x_axes) != op_axes: # x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) #x = shapeprint(x) #self._filters = shapeprint(self._filters) rval = cuda.blas.GpuCorr3dMM(border_mode= 'valid', subsample = tuple(self.kernel_stride), pad=tuple(self.pad))(x, self._filters) #rval = conv3d(im, filt, None, None, (self.kernel_stride[0], self.kernel_stride[1]) ) #rval = rval.dimshuffle(0,4,1,2,3) #print "hello" return rval
def lmul(self, x): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") # TODO Why is it CPU?? print 'Por que?!?!', type(x) cpu = 'Cuda' not in str(type(x)) if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 op_axes = ('c', 0, 1, 't', 'b') if tuple(x_axes) != op_axes: print 'ssssssssssssssss' x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) _x_4d_shape = (self.signal_shape[0], self.signal_shape[1], self.signal_shape[2], self.signal_shape[3] * self.signal_shape[4]) x = x.reshape(_x_4d_shape) x = gpu_contiguous(x) rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(x, self._filters) if cpu: rval = host_from_gpu(rval) rval = rval.reshape( (self.filter_shape[3], self.filter_shape[4], rval.shape[1], rval.shape[2], self.signal_shape[3], self.signal_shape[4])) rval = diagonal_subtensor(rval, 4, 0).sum(axis=0) # Format the output based on the output space rval_axes = self.output_axes assert len(rval_axes) == 5 if tuple(rval_axes) != op_axes: rval = rval.dimshuffle( *[op_axes.index(axis) for axis in rval_axes]) return rval
def lmul(self, x): """ .. todo:: WRITEME properly dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) if cpu: x = gpu_from_host(x) # x must be formatted as channel, topo dim 0, topo dim 1, batch_index # for use with FilterActs assert x.ndim == 4 x_axes = self.input_axes assert len(x_axes) == 4 op_axes = ('c', 0, 1, 'b') if tuple(x_axes) != op_axes: x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) x = gpu_contiguous(x) # Patch old pickle files. if not hasattr(self, 'kernel_stride'): self.kernel_stride = (1, 1) rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])( x, self._filters ) # Format the output based on the output space rval_axes = self.output_axes assert len(rval_axes) == 4 if cpu: rval = host_from_gpu(rval) if tuple(rval_axes) != op_axes: rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def lmul(self, x, b): """ dot(x, A) aka, do convolution with input image x """ check_cuda(str(type(self)) + ".lmul") cpu = 'Cuda' not in str(type(x)) assert cpu if cpu: x = gpu_from_host(x) assert x.ndim == 5 x_axes = self.input_axes assert len(x_axes) == 5 #op_axes = ('b', 0, 1, 't', 'c') #if tuple(x_axes) != op_axes: # x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes]) rval = self.conv3d_op(x, self._filters, b, (1, 1, 1)) #rval = conv.Conv3DFFT(self.signal_shape, self.filter_shape)(x, self._filters) #rval = conv.conv3d_fft(x, # self._filters, # image_shape = x.shape, # filter_shape = self.filter_shape) #rval = x rval_axes = self.output_axes assert len(rval_axes) == 5 #op_axes = ('b', 'c', 't', 0, 1) #if tuple(rval_axes) != op_axes: # rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes]) return rval
def setup_detector_layer_c01b(layer, input_space, rng): """ .. todo:: WRITEME properly Takes steps to set up an object for use as being some kind of convolutional layer. This function sets up only the detector layer. Does the following: * raises a RuntimeError if cuda is not available * sets layer.input_space to input_space * sets up addition of dummy channels for compatibility with cuda-convnet: - layer.dummy_channels: # of dummy channels that need to be added (You might want to check this and raise an Exception if it's not 0) - layer.dummy_space: The Conv2DSpace representing the input with dummy channels added * sets layer.detector_space to the space for the detector layer * sets layer.transformer to be a Conv2D instance * sets layer.b to the right value Parameters ---------- layer : object Any python object that allows the modifications described below and has the following attributes: * pad : int describing amount of zero padding to add * kernel_shape : 2-element tuple or list describing spatial shape of kernel * fix_kernel_shape : bool, if true, will shrink the kernel shape to make it feasible, as needed (useful for hyperparameter searchers) * detector_channels : The number of channels in the detector layer * init_bias : numeric constant added to a tensor of zeros to initialize the bias * tied_b : If true, biases are shared across all spatial locations input_space : WRITEME A Conv2DSpace to be used as input to the layer rng : WRITEME A numpy RandomState or equivalent """ # Use "self" to refer to layer from now on, so we can pretend we're # just running in the set_input_space method of the layer self = layer # Make sure cuda is available check_cuda(str(type(self))) # Validate input if not isinstance(input_space, Conv2DSpace): raise TypeError("The input to a convolutional layer should be a " "Conv2DSpace, but layer " + self.layer_name + " got " + str(type(self.input_space))) if not hasattr(self, 'detector_channels'): raise ValueError("layer argument must have a 'detector_channels' " "attribute specifying how many channels to put in " "the convolution kernel stack.") # Store the input space self.input_space = input_space # Make sure number of channels is supported by cuda-convnet # (multiple of 4 or <= 3) # If not supported, pad the input with dummy channels ch = self.input_space.num_channels rem = ch % 4 if ch > 3 and rem != 0: self.dummy_channels = 4 - rem else: self.dummy_channels = 0 self.dummy_space = Conv2DSpace( shape=input_space.shape, channels=input_space.num_channels + self.dummy_channels, axes=('c', 0, 1, 'b') ) if hasattr(self, 'kernel_stride'): kernel_stride = self.kernel_stride else: kernel_stride = [1, 1] output_shape = \ [int(np.ceil((i_sh + 2. * self.pad - k_sh) / float(k_st))) + 1 for i_sh, k_sh, k_st in zip(self.input_space.shape, self.kernel_shape, kernel_stride)] def handle_kernel_shape(idx): if self.kernel_shape[idx] < 1: raise ValueError("kernel must have strictly positive size on all " "axes but has shape: " + str(self.kernel_shape)) if output_shape[idx] <= 0: if self.fix_kernel_shape: self.kernel_shape[idx] = \ self.input_space.shape[idx] + 2 * self.pad assert self.kernel_shape[idx] != 0 output_shape[idx] = 1 warnings.warn("Had to change the kernel shape to make " "network feasible") else: raise ValueError("kernel too big for input " "(even with zero padding)") map(handle_kernel_shape, [0, 1]) if self.detector_channels < 16: raise ValueError("Cuda-convnet requires the detector layer to have " "at least 16 channels.") self.detector_space = Conv2DSpace(shape=output_shape, num_channels=self.detector_channels, axes=('c', 0, 1, 'b')) if hasattr(self, 'partial_sum'): partial_sum = self.partial_sum else: partial_sum = 1 if hasattr(self, 'sparse_init') and self.sparse_init is not None: self.transformer = \ checked_call(make_sparse_random_conv2D, OrderedDict([('num_nonzero', self.sparse_init), ('input_space', self.input_space), ('output_space', self.detector_space), ('kernel_shape', self.kernel_shape), ('pad', self.pad), ('partial_sum', partial_sum), ('kernel_stride', kernel_stride), ('rng', rng)])) else: self.transformer = make_random_conv2D( irange=self.irange, input_axes=self.input_space.axes, output_axes=self.detector_space.axes, input_channels=self.dummy_space.num_channels, output_channels=self.detector_space.num_channels, kernel_shape=self.kernel_shape, pad=self.pad, partial_sum=partial_sum, kernel_stride=kernel_stride, rng=rng ) W, = self.transformer.get_params() W.name = self.layer_name + '_W' if self.tied_b: self.b = sharedX(np.zeros(self.detector_space.num_channels) + self.init_bias) else: self.b = sharedX(self.detector_space.get_origin() + self.init_bias) self.b.name = self.layer_name + '_b' logger.info('Input shape: {0}'.format(self.input_space.shape)) logger.info('Detector space: {0}'.format(self.detector_space.shape))
def __init__(self, num_channels, num_pieces, kernel_shape, pool_shape, pool_stride, pool_temporal_shape, pool_temporal_stride, layer_name, irange=None, init_bias=0., W_lr_scale=None, b_lr_scale=None, pad=0, fix_pool_shape=False, fix_pool_stride=False, fix_kernel_shape=False, partial_sum=1, tied_b=False, max_kernel_norm=None, input_normalization=None, detector_normalization=None, min_zero=False, output_normalization=None, kernel_stride=(1, 1, 1)): """ layer_name: A name for this layer that will be prepended to monitoring channels related to this layer. num_channels: The number of output channels the layer should have. Note that it must internally compute num_channels * num_pieces convolution channels. num_pieces: The number of linear pieces used to make each maxout unit. kernel_shape: The shape of the convolution kernel. pool_shape: The shape of the spatial max pooling. A three-tuple of ints. pool_stride: The stride of the spatial and temporal max pooling. irange: if specified, initializes each weight randomly in U(-irange, irange) init_bias: All biases are initialized to this number W_lr_scale: The learning rate on the weights for this layer is multiplied by this scaling factor b_lr_scale: The learning rate on the biases for this layer is multiplied by this scaling factor pad: The amount of zero-padding to implicitly add to the boundary of the image when computing the convolution. Useful for making sure pixels at the edge still get to influence multiple hidden units. fix_pool_shape: If True, will modify self.pool_shape to avoid having pool shape bigger than the entire detector layer. If you have this on, you should probably also have fix_pool_stride on, since the pool shape might shrink smaller than the stride, even if the stride was initially valid. The "fix" parameters are useful for working with a hyperparameter optimization package, which might often propose sets of hyperparameters that are not feasible, but can easily be projected back into the feasible set. fix_kernel_shape: if True, will modify self.kernel_shape to avoid having the kernel shape bigger than the implicitly zero padded input layer partial_sum: a parameter that controls whether to prefer runtime savings or memory savings when computing the gradient with respect to the kernels. See pylearn2.sandbox.cuda_convnet.weight_acts.py for details. The default is to prefer high speed. Note that changing this setting may change the value of computed results slightly due to different rounding error. tied_b: If true, all biases in the same channel are constrained to be the same as each other. Otherwise, each bias at each location is learned independently. max_kernel_norm: If specifed, each kernel is constrained to have at most this norm. input_normalization, detector_normalization, output_normalization: if specified, should be a callable object. the state of the network is optionally replaced with normalization(state) at each of the 3 points in processing: input: the input the layer receives can be normalized right away detector: the maxout units can be normalized prior to the spatial pooling output: the output of the layer, after sptial pooling, can be normalized as well kernel_stride: vertical, horizontal and time pixel stride between each detector. """ check_cuda(str(type(self))) detector_channels = num_channels * num_pieces self.__dict__.update(locals()) del self.self
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # fft 3d covolution z = self.transformer.lmul(state_below) # bias addition if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x', 'x') else: b = self.b.dimshuffle('x', 0, 1, 2, 3) z = z + self.b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) #assert self.detector_space.num_channels % 16 == 0 #ReLUs z = T.maximum(z, 0) if self.output_space.num_channels % 16 != 0: raise NotImplementedError( "num channles should always be dvisible by 16") # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property # Pooling # permute axes ['b', 0, 1,'t','c'] -> ['c', 0, 1, 't', 'b'] (axes required for pooling ) z = z.dimshuffle(4, 1, 2, 3, 0) # spatial pooling x/y z_shape = z.shape z = z.reshape( (z_shape[0], z_shape[1], z_shape[2], z_shape[3] * z_shape[4])) p = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride) p_shape = p.shape p = p.reshape( (p_shape[0], p_shape[1], p_shape[2], z_shape[3], z_shape[4])) # temporal pooling with overlap (t) p_shape = p.shape #['c', 0, 1, 't', 'b'] -> ['c',0*1,'t','b'] ('c',0, 1,'b') for max_pool_c01b p = p.reshape( (p_shape[0], p_shape[1] * p_shape[2], p_shape[3], p_shape[4])) t = temporal_max_pool_c01b(c01b=p, pool_shape=self.pool_temporal_shape, pool_stride=self.pool_temporal_stride, image_shape=self.temp_pool_input_shape) t_shape = t.shape t = t.reshape( (t_shape[0], p_shape[1], p_shape[2], t_shape[2], t_shape[3])) # Permute back axes ['c', 0, 1, 't', 'b'] -> ['b', 0, 1, 't', 'c'] t = t.dimshuffle(4, 1, 2, 3, 0) self.output_space.validate(t) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: t = self.output_normalization(t) return t
def piece_prop(self, state_below): """ Note: this only reports pieces in terms of which channel wins, not which spatial location wins. Depending on the input size, it may report a piece map for either the pre-spatial pooling or the post-spatial pooling tensor. """ check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # Alex's code requires # input channels to be <= 3 or a multiple of 4 # so we add dummy channels if necessary if not hasattr(self, 'dummy_channels'): self.dummy_channels = 0 if self.dummy_channels > 0: state_below = T.concatenate( (state_below, T.zeros_like(state_below[0:self.dummy_channels, :, :, :])), axis=0) z = self.transformer.lmul(state_below) if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x') else: b = self.b.dimshuffle(0, 1, 2, 'x') z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) assert self.detector_space.num_channels % 16 == 0 if self.output_space.num_channels % 16 == 0: # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property piece = None if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces, :, :, :] if s is None: s = t piece = T.zeros_like(t) else: s = T.maximum(s, t) mask = T.eq(s, t) piece = mask * i + (1 - mask) * piece z = s if self.detector_normalization: z = self.detector_normalization(z) p = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) else: if self.detector_normalization is not None: raise NotImplementedError( "We can't normalize the detector " "layer because the detector layer never exists as a " "stage of processing in this implementation.") z = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) if self.num_pieces != 1: s = None piece = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces, :, :, :] if s is None: s = t piece = T.zeros_like(t) else: s = T.maximum(s, t) mask = T.eq(s, t) piece = mask * i + (1 - mask) * piece z = s p = z self.output_space.validate(p) if hasattr(self, 'min_zero') and self.min_zero: p = p * (p > 0.) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: p = self.output_normalization(p) return p, piece
def setup_detector_layer_btc01(layer, input_space, rng, irange): # Use "self" to refer to layer from now on, so we can pretend we're just running # in the set_input_space method of the layer self = layer # Make sure cuda is available check_cuda(str(type(self))) #import pdb; pdb.set_trace() # Validate input if not isinstance(input_space, Conv3DSpace): raise TypeError("The input to a convolutional layer should be a Conv3DSpace, " " but layer " + self.layer_name + " got "+str(type(self.input_space))) if not hasattr(self, 'detector_channels'): raise ValueError('layer argument must have a "detector_channels" attribute specifying how many channels to put in the convolution kernel stack.') # Store the input space self.input_space = input_space #self.dummy_space = Conv3DSpace(shape=input_space.shape, # channels=input_space.num_channels + self.dummy_channels, # axes=('b', 'c', 't', 0, 1)) output_shape = [int((i_sh + 2. * p_sh - k_sh) / float(k_st)) +1 for i_sh, p_sh, k_sh, k_st in zip(self.input_space.shape, self.pad, self.kernel_shape, self.kernel_stride)] def handle_kernel_shape(idx): if self.kernel_shape[idx] < 1: raise ValueError("kernel must have strictly positive size on all axes but has shape: "+str(self.kernel_shape)) if output_shape[idx] <= 0: if self.fix_kernel_shape: self.kernel_shape[idx] = self.input_space.shape[idx] + 2 * self.pad assert self.kernel_shape[idx] != 0 output_shape[idx] = 1 warnings.warn("Had to change the kernel shape to make network feasible") else: raise ValueError("kernel too big for input (even with zero padding)") map(handle_kernel_shape, [0, 1, 2]) # space required for 3dconv self.detector_space = Conv3DSpace(shape=output_shape, num_channels = self.detector_channels, axes = ('b', 'c', 0, 1,'t')) if hasattr(self, 'partial_sum'): partial_sum = self.partial_sum else: partial_sum = 1 # filter shape required for fft3dconv ('c_detector','c','t','0','1') filter_shape = (self.detector_space.num_channels, self.input_space.num_channels, self.kernel_shape[0], self.kernel_shape[1], self.kernel_shape[2], ) # filter shape required for fft-3dconv ('b','c','t','0','1') signal_shape = (self.mlp.batch_size, self.input_space.num_channels, self.input_space.shape[0], self.input_space.shape[1], self.input_space.shape[2], ) self.transformer = make_random_conv3D( irange = self.irange, input_axes = ('b', 'c', 0, 1,'t'), output_axes = self.detector_space.axes, signal_shape = signal_shape, filter_shape = filter_shape, pad = self.pad, partial_sum = partial_sum, kernel_stride = self.kernel_stride, rng = rng) W, = self.transformer.get_params() W.name = 'W' if self.tied_b: self.b = sharedX(np.zeros((self.detector_space.num_channels)) + self.init_bias) else: self.b = sharedX(self.detector_space.get_origin() + self.init_bias) self.b.name = 'b'
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None #state_below= Print("state_below")(state_below) if self.input_normalization: state_below = self.input_normalization(state_below) #import pdb; pdb.set_trace() # GPU 3d correlation z = self.transformer.lmul(state_below) # bias addition if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle('x', 0, 'x', 'x', 'x') else: b = self.b.dimshuffle('x', 0, 1, 2, 3) #z = Print('z')(z) #b = Print('b')(b) z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) #assert self.detector_space.num_channels % 16 == 0 #ReLUs #z= Print("z")(z) z = T.maximum(z, 0) # Pooling if tuple(self.pool_shape) != (1, 1, 1): # Pooling on y, t z_shape = z.shape z = z.reshape( (z_shape[0], z_shape[1] * z_shape[2], z_shape[3], z_shape[4])) p = dnn_pool(img=z, ws=tuple(self.pool_shape[1:]), stride=tuple(self.pool_stride[1:])) p_shape = p.shape p = p.reshape( (p_shape[0], z_shape[1], z_shape[2], p_shape[2], p_shape[3])) #p = Print("p")(p) # Pooling on x p_shape = p.shape p = p.reshape( (p_shape[0], p_shape[1], p_shape[2], p_shape[3] * p_shape[4])) t = dnn_pool(img=p, ws=tuple([self.pool_shape[0], 1]), stride=tuple([self.pool_stride[0], 1])) t_shape = t.shape #print (t_shape[0], t_shape[1], t_shape[2], p_shape[2] , p_shape[3]) t = t.reshape( (t_shape[0], t_shape[1], t_shape[2], p_shape[3], p_shape[4])) else: t = z self.output_space.validate(t) ## Gpu contiguous #t = gpu_contiguous(t) #t = Print("t")(t) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: t = self.output_normalization(t) return t
def setup_detector_layer_bct01(layer, input_space, rng, irange): """ Takes steps to set up an object for use as being some kind of convolutional layer. This function sets up only the detector layer. Parameters ---------- layer: Any python object that allows the modifications described below and has the following attributes: pad: int describing amount of zero padding to add kernel_shape: 2-element tuple or list describing spatial shape of kernel fix_kernel_shape: bool, if true, will shrink the kernel shape to make it feasible, as needed (useful for hyperparameter searchers) detector_channels: The number of channels in the detector layer init_bias: A numeric constant added to a tensor of zeros to initialize the bias tied_b: If true, biases are shared across all spatial locations input_space: A Conv3DSpace to be used as input to the layer rng: a numpy RandomState or equivalent irange: float. kernel elements are initialized randomly from U(-irange, irange) Does the following: raises a RuntimeError if cuda is not available sets layer.input_space to input_space sets up addition of dummy channels for compatibility with cuda-convnet: layer.dummy_channels: # of dummy channels that need to be added (You might want to check this and raise an Exception if it's not 0) layer.dummy_space: The Conv2DSpace representing the input with dummy channels added sets layer.detector_space to the space for the detector layer sets layer.transformer to be a Conv3DBCT01 instance sets layer.b to the right value """ # Use "self" to refer to layer from now on, so we can pretend we're just running # in the set_input_space method of the layer self = layer # Make sure cuda is available check_cuda(str(type(self))) # Validate input if not isinstance(input_space, Conv3DSpace): raise TypeError("The input to a convolutional layer should be a Conv3DSpace, " " but layer " + self.layer_name + " got "+str(type(self.input_space))) if not hasattr(self, 'detector_channels'): raise ValueError('layer argument must have a "detector_channels" attribute specifying how many channels to put in the convolution kernel stack.') # Store the input space self.input_space = input_space #self.dummy_space = Conv3DSpace(shape=input_space.shape, # channels=input_space.num_channels + self.dummy_channels, # axes=('b', 'c', 't', 0, 1)) if hasattr(self, 'kernel_stride'): kernel_stride = self.kernel_stride else: kernel_stride = [1, 1] dummy_shape = [self.input_space.shape[0] , self.input_space.shape[1] ] output_shape = [int(np.ceil((i_sh + 2. * self.pad - k_sh) / float(k_st))) + 1 for i_sh, k_sh, k_st in zip(dummy_shape, self.kernel_shape, kernel_stride)] output_sequence_length = self.input_space.shape[2] - self.kernel_sequence_length + 1 if output_sequence_length < 0: raise ValueError("Input sequence length ({}) should >= output sequence_length ({})".format(self.input_space.sequence_length, self.kernel_sequence_length)) def handle_kernel_shape(idx): if self.kernel_shape[idx] < 1: raise ValueError("kernel must have strictly positive size on all axes but has shape: "+str(self.kernel_shape)) if output_shape[idx] <= 0: if self.fix_kernel_shape: self.kernel_shape[idx] = self.input_space.shape[idx] + 2 * self.pad assert self.kernel_shape[idx] != 0 output_shape[idx] = 1 warnings.warn("Had to change the kernel shape to make network feasible") else: raise ValueError("kernel too big for input (even with zero padding)") map(handle_kernel_shape, [0, 1]) # space required for fft-3dconv output_shape = [output_shape[0], output_shape[1], output_sequence_length] self.detector_space = Conv3DSpace(shape=output_shape, num_channels = self.detector_channels, axes = ('b', 'c', 't', 0, 1)) if hasattr(self, 'partial_sum'): partial_sum = self.partial_sum else: partial_sum = 1 # filter shape required for fft-3dconv ('c_detector','c','t','0','1') filter_shape = (self.detector_space.num_channels, self.input_space.num_channels, self.kernel_sequence_length, self.kernel_shape[0], self.kernel_shape[1] ) # filter shape required for fft-3dconv ('b','c','t','0','1') signal_shape = (self.mlp.batch_size, self.input_space.num_channels, self.input_space.sequence_length, self.input_space.shape[0], self.input_space.shape[1] ) self.transformer = make_random_conv3D( irange = self.irange, input_axes = ('b', 'c', 't', 0, 1), output_axes = self.detector_space.axes, signal_shape = signal_shape, filter_shape = filter_shape, pad = self.pad, partial_sum = partial_sum, kernel_stride = kernel_stride, rng = rng) W, = self.transformer.get_params() W.name = 'W' if self.tied_b: self.b = sharedX(np.zeros((self.detector_space.num_channels)) + self.init_bias) else: self.b = sharedX(self.detector_space.get_origin() + self.init_bias) self.b.name = 'b'
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None #state_below= Print("state_below")(state_below) if self.input_normalization: state_below = self.input_normalization(state_below) #import pdb; pdb.set_trace() # GPU 3d correlation z = self.transformer.lmul(state_below) # bias addition if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle('x', 0, 'x', 'x', 'x') else: b = self.b.dimshuffle('x', 0, 1, 2, 3) #z = Print('z')(z) #b = Print('b')(b) z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) #assert self.detector_space.num_channels % 16 == 0 #ReLUs #z= Print("z")(z) z = T.maximum(z, 0) # Pooling if tuple(self.pool_shape) != (1, 1, 1): # Pooling on y, t z_shape = z.shape z = z.reshape((z_shape[0], z_shape[1] * z_shape[2], z_shape[3], z_shape[4])) p = dnn_pool(img=z, ws=tuple(self.pool_shape[1:]), stride=tuple(self.pool_stride[1:])) p_shape = p.shape p = p.reshape((p_shape[0], z_shape[1], z_shape[2], p_shape[2], p_shape[3])) #p = Print("p")(p) # Pooling on x p_shape =p.shape p = p.reshape((p_shape[0], p_shape[1], p_shape[2], p_shape[3] * p_shape[4])) t = dnn_pool(img=p, ws=tuple([self.pool_shape[0], 1]), stride=tuple([self.pool_stride[0], 1])) t_shape = t.shape #print (t_shape[0], t_shape[1], t_shape[2], p_shape[2] , p_shape[3]) t = t.reshape((t_shape[0], t_shape[1], t_shape[2], p_shape[3] , p_shape[4])) else: t = z self.output_space.validate(t) ## Gpu contiguous #t = gpu_contiguous(t) #t = Print("t")(t) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: t = self.output_normalization(t) return t
def __init__( self, num_channels, num_pieces, kernel_shape, pool_shape, pool_stride, layer_name, irange=None, init_bias=0.0, W_lr_scale=None, b_lr_scale=None, pad=0, fix_pool_shape=False, fix_pool_stride=False, fix_kernel_shape=False, partial_sum=1, tied_b=False, max_kernel_norm=None, input_normalization=None, detector_normalization=None, min_zero=False, output_normalization=None, kernel_stride=(1, 1), ): """ num_channels: The number of output channels the layer should have. Note that it must internally compute num_channels * num_pieces convolution channels. num_pieces: The number of linear pieces used to make each maxout unit. kernel_shape: The shape of the convolution kernel. pool_shape: The shape of the spatial max pooling. A two-tuple of ints. This is redundant as cuda-convnet requires the pool shape to be square. pool_stride: The stride of the spatial max pooling. Also must be square. layer_name: A name for this layer that will be prepended to monitoring channels related to this layer. irange: if specified, initializes each weight randomly in U(-irange, irange) init_bias: All biases are initialized to this number W_lr_scale: The learning rate on the weights for this layer is multiplied by this scaling factor b_lr_scale: The learning rate on the biases for this layer is multiplied by this scaling factor pad: The amount of zero-padding to implicitly add to the boundary of the image when computing the convolution. Useful for making sure pixels at the edge still get to influence multiple hidden units. fix_pool_shape: If True, will modify self.pool_shape to avoid having pool shape bigger than the entire detector layer. If you have this on, you should probably also have fix_pool_stride on, since the pool shape might shrink smaller than the stride, even if the stride was initially valid. The "fix" parameters are useful for working with a hyperparameter optimization package, which might often propose sets of hyperparameters that are not feasible, but can easily be projected back into the feasible set. fix_kernel_shape: if True, will modify self.kernel_shape to avoid having the kernel shape bigger than the implicitly zero padded input layer partial_sum: a parameter that controls whether to prefer runtime savings or memory savings when computing the gradient with respect to the kernels. See pylearn2.sandbox.cuda_convnet.weight_acts.py for details. The default is to prefer high speed. Note that changing this setting may change the value of computed results slightly due to different rounding error. tied_b: If true, all biases in the same channel are constrained to be the same as each other. Otherwise, each bias at each location is learned independently. max_kernel_norm: If specifed, each kernel is constrained to have at most this norm. input_normalization, detector_normalization, output_normalization: if specified, should be a callable object. the state of the network is optionally replaced with normalization(state) at each of the 3 points in processing: input: the input the layer receives can be normalized right away detector: the maxout units can be normalized prior to the spatial pooling output: the output of the layer, after sptial pooling, can be normalized as well kernel_stride: vertical and horizontal pixel stride between each detector. """ check_cuda(str(type(self))) detector_channels = num_channels * num_pieces self.__dict__.update(locals()) del self.self
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # Alex's code requires # input channels to be <= 3 or a multiple of 4 # so we add dummy channels if necessary if not hasattr(self, 'dummy_channels'): self.dummy_channels = 0 if self.dummy_channels > 0: state_below = T.concatenate((state_below, T.zeros_like(state_below[0:self.dummy_channels, :, :, :, :])), axis=0) z = self.transformer.lmul(state_below) if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x', 'x') else: b = self.b.dimshuffle(0, 1, 2, 'x', 'x') z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) assert self.detector_space.num_channels % 16 == 0 if self.output_space.num_channels % 16 == 0: # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces,:,:,:] if s is None: s = t else: s = T.maximum(s, t) z = s # pool across sequences if self.sequence_pool_shape != 1: s = None for i in xrange(self.sequence_pool_shape): t = z[:,:,:,i::self.sequence_pool_shape,:] if s is None: s = t else: s = T.maximum(s, t) z = s if self.detector_normalization: z = self.detector_normalization(z) # spatial pooling z_shape = z.shape z = z.reshape((z_shape[0], z_shape[1], z_shape[2], z_shape[3] * z_shape[4])) p = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) p_shape = p.shape p = p.reshape((p_shape[0], p_shape[1], p_shape[2], z_shape[3], z_shape[4])) else: raise NotImplementedError("num channles should always be dvisible by 16") self.output_space.validate(p) if hasattr(self, 'min_zero') and self.min_zero: p = p * (p > 0.) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: p = self.output_normalization(p) return p
def setup_detector_layer_b01tc(layer, input_space, rng, irange, stride): """ Takes steps to set up an object for use as being some kind of convolutional layer. This function sets up only the detector layer. Parameters ---------- layer: Any python object that allows the modifications described below and has the following attributes: pad: int describing amount of zero padding to add kernel_shape: 3-element tuple or list describing shape of kernel fix_kernel_shape: bool, if true, will shrink the kernel shape to make it feasible, as needed (useful for hyperparameter searchers) detector_channels: The number of channels in the detector layer init_bias: A numeric constant added to a tensor of zeros to initialize the bias tied_b: If true, biases are shared across all spatial locations input_space: A Conv3DSpace to be used as input to the layer rng: a numpy RandomState or equivalent irange: float. kernel elements are initialized randomly from U(-irange, irange) Does the following: raises a RuntimeError if cuda is not available sets layer.input_space to input_space sets up addition of dummy channels for compatibility with cuda-convnet: layer.dummy_channels: # of dummy channels that need to be added (You might want to check this and raise an Exception if it's not 0) layer.dummy_space: The Conv2DSpace representing the input with dummy channels added sets layer.detector_space to the space for the detector layer sets layer.transformer to be a Conv3DB01TC instance sets layer.b to the right value """ # Use "self" to refer to layer from now on, so we can pretend we're just running # in the set_input_space method of the layer self = layer # Make sure cuda is available check_cuda(str(type(self))) # Validate input if not isinstance(input_space, Conv3DSpace): raise TypeError( "The input to a convolutional layer should be a Conv3DSpace, " " but layer " + self.layer_name + " got " + str(type(self.input_space))) if not hasattr(self, 'detector_channels'): raise ValueError( 'layer argument must have a "detector_channels" attribute specifying how many channels to put in the convolution kernel stack.' ) # Store the input space self.input_space = input_space #self.dummy_space = Conv3DSpace(shape=input_space.shape, # channels=input_space.num_channels + self.dummy_channels, # axes=('b', 'c', 't', 0, 1)) if hasattr(self, 'kernel_stride'): kernel_stride = stride else: kernel_stride = stride #import pdb; pdb.set_trace() #dummy_shape = [self.input_space.shape[0], self.input_space.shape[1] ] output_shape = [ int((i_sh + 2. * self.pad - k_sh) / float(k_st)) + 1 for i_sh, k_sh, k_st in zip(self.input_space.shape, self.kernel_shape, kernel_stride) ] def handle_kernel_shape(idx): if self.kernel_shape[idx] < 1: raise ValueError( "kernel must have strictly positive size on all axes but has shape: " + str(self.kernel_shape)) if output_shape[idx] <= 0: if self.fix_kernel_shape: self.kernel_shape[ idx] = self.input_space.shape[idx] + 2 * self.pad assert self.kernel_shape[idx] != 0 output_shape[idx] = 1 warnings.warn( "Had to change the kernel shape to make network feasible") else: raise ValueError( "kernel too big for input (even with zero padding)") map(handle_kernel_shape, [0, 1, 2]) # space required for 3dconv self.detector_space = Conv3DSpace(shape=output_shape, num_channels=self.detector_channels, axes=('b', 0, 1, 't', 'c')) if hasattr(self, 'partial_sum'): partial_sum = self.partial_sum else: partial_sum = 1 # filter shape required for fft3dconv ('c_detector','c','t','0','1') filter_shape = ( self.detector_space.num_channels, self.kernel_shape[0], self.kernel_shape[1], self.kernel_shape[2], self.input_space.num_channels, ) # filter shape required for fft-3dconv ('b','c','t','0','1') signal_shape = ( self.mlp.batch_size, self.input_space.shape[0], self.input_space.shape[1], self.input_space.shape[2], self.input_space.num_channels, ) self.transformer = make_random_conv3D(irange=self.irange, input_axes=('b', 0, 1, 't', 'c'), output_axes=self.detector_space.axes, signal_shape=signal_shape, filter_shape=filter_shape, pad=self.pad, partial_sum=partial_sum, kernel_stride=kernel_stride, rng=rng) W, = self.transformer.get_params() W.name = 'W' if self.tied_b: self.b = sharedX( np.zeros((self.detector_space.num_channels)) + self.init_bias) else: self.b = sharedX(self.detector_space.get_origin() + self.init_bias) self.b.name = 'b'
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # Alex's code requires # input channels to be <= 3 or a multiple of 4 # so we add dummy channels if necessary if not hasattr(self, 'dummy_channels'): self.dummy_channels = 0 if self.dummy_channels > 0: state_below = T.concatenate( (state_below, T.zeros_like(state_below[0:self.dummy_channels, :, :, :, :])), axis=0) z = self.transformer.lmul(state_below) if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x', 'x') else: b = self.b.dimshuffle(0, 1, 2, 'x', 'x') z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) assert self.detector_space.num_channels % 16 == 0 if self.output_space.num_channels % 16 == 0: # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces, :, :, :] if s is None: s = t else: s = T.maximum(s, t) z = s # pool across sequences if self.sequence_pool_shape != 1: s = None for i in xrange(self.sequence_pool_shape): t = z[:, :, :, i::self.sequence_pool_shape, :] if s is None: s = t else: s = T.maximum(s, t) z = s if self.detector_normalization: z = self.detector_normalization(z) # spatial pooling z_shape = z.shape z = z.reshape( (z_shape[0], z_shape[1], z_shape[2], z_shape[3] * z_shape[4])) p = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) p_shape = p.shape p = p.reshape( (p_shape[0], p_shape[1], p_shape[2], z_shape[3], z_shape[4])) else: raise NotImplementedError( "num channles should always be dvisible by 16") self.output_space.validate(p) if hasattr(self, 'min_zero') and self.min_zero: p = p * (p > 0.) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: p = self.output_normalization(p) return p
def setup_detector_layer_c01b(layer, input_space, rng, irange="not specified"): """ .. todo:: WRITEME properly Takes steps to set up an object for use as being some kind of convolutional layer. This function sets up only the detector layer. Does the following: * raises a RuntimeError if cuda is not available * sets layer.input_space to input_space * sets up addition of dummy channels for compatibility with cuda-convnet: - layer.dummy_channels: # of dummy channels that need to be added (You might want to check this and raise an Exception if it's not 0) - layer.dummy_space: The Conv2DSpace representing the input with dummy channels added * sets layer.detector_space to the space for the detector layer * sets layer.transformer to be a Conv2D instance * sets layer.b to the right value Parameters ---------- layer : object Any python object that allows the modifications described below and has the following attributes: * pad : int describing amount of zero padding to add * kernel_shape : 2-element tuple or list describing spatial shape of kernel * fix_kernel_shape : bool, if true, will shrink the kernel shape to make it feasible, as needed (useful for hyperparameter searchers) * detector_channels : The number of channels in the detector layer * init_bias : numeric constant added to a tensor of zeros to initialize the bias * tied_b : If true, biases are shared across all spatial locations input_space : WRITEME A Conv2DSpace to be used as input to the layer rng : WRITEME A numpy RandomState or equivalent """ if irange != "not specified": raise AssertionError( "There was a bug in setup_detector_layer_c01b." "It uses layer.irange instead of the irange parameter to the " "function. The irange parameter is now disabled by this " "AssertionError, so that this error message can alert you that " "the bug affected your code and explain why the interface is " "changing. The irange parameter to the function and this " "error message may be removed after April 21, 2014." ) # Use "self" to refer to layer from now on, so we can pretend we're # just running in the set_input_space method of the layer self = layer # Make sure cuda is available check_cuda(str(type(self))) # Validate input if not isinstance(input_space, Conv2DSpace): raise TypeError( "The input to a convolutional layer should be a " "Conv2DSpace, but layer " + self.layer_name + " got " + str(type(self.input_space)) ) if not hasattr(self, "detector_channels"): raise ValueError( "layer argument must have a 'detector_channels' " "attribute specifying how many channels to put in " "the convolution kernel stack." ) # Store the input space self.input_space = input_space # Make sure number of channels is supported by cuda-convnet # (multiple of 4 or <= 3) # If not supported, pad the input with dummy channels ch = self.input_space.num_channels rem = ch % 4 if ch > 3 and rem != 0: self.dummy_channels = 4 - rem else: self.dummy_channels = 0 self.dummy_space = Conv2DSpace( shape=input_space.shape, channels=input_space.num_channels + self.dummy_channels, axes=("c", 0, 1, "b") ) if hasattr(self, "kernel_stride"): kernel_stride = self.kernel_stride else: kernel_stride = [1, 1] output_shape = [ int(np.ceil((i_sh + 2.0 * self.pad - k_sh) / float(k_st))) + 1 for i_sh, k_sh, k_st in zip(self.input_space.shape, self.kernel_shape, kernel_stride) ] def handle_kernel_shape(idx): if self.kernel_shape[idx] < 1: raise ValueError( "kernel must have strictly positive size on all " "axes but has shape: " + str(self.kernel_shape) ) if output_shape[idx] <= 0: if self.fix_kernel_shape: self.kernel_shape[idx] = self.input_space.shape[idx] + 2 * self.pad assert self.kernel_shape[idx] != 0 output_shape[idx] = 1 warnings.warn("Had to change the kernel shape to make " "network feasible") else: raise ValueError("kernel too big for input " "(even with zero padding)") map(handle_kernel_shape, [0, 1]) if self.detector_channels < 16: raise ValueError("Cuda-convnet requires the detector layer to have " "at least 16 channels.") self.detector_space = Conv2DSpace(shape=output_shape, num_channels=self.detector_channels, axes=("c", 0, 1, "b")) if hasattr(self, "partial_sum"): partial_sum = self.partial_sum else: partial_sum = 1 if hasattr(self, "sparse_init") and self.sparse_init is not None: self.transformer = checked_call( make_sparse_random_conv2D, OrderedDict( [ ("num_nonzero", self.sparse_init), ("input_space", self.input_space), ("output_space", self.detector_space), ("kernel_shape", self.kernel_shape), ("pad", self.pad), ("partial_sum", partial_sum), ("kernel_stride", kernel_stride), ("rng", rng), ] ), ) else: self.transformer = make_random_conv2D( irange=self.irange, input_axes=self.input_space.axes, output_axes=self.detector_space.axes, input_channels=self.dummy_space.num_channels, output_channels=self.detector_space.num_channels, kernel_shape=self.kernel_shape, pad=self.pad, partial_sum=partial_sum, kernel_stride=kernel_stride, rng=rng, ) W, = self.transformer.get_params() W.name = self.layer_name + "_W" if self.tied_b: self.b = sharedX(np.zeros(self.detector_space.num_channels) + self.init_bias) else: self.b = sharedX(self.detector_space.get_origin() + self.init_bias) self.b.name = self.layer_name + "_b" logger.info("Input shape: {0}".format(self.input_space.shape)) logger.info("Detector space: {0}".format(self.detector_space.shape))
def piece_prop(self, state_below): """ Note: this only reports pieces in terms of which channel wins, not which spatial location wins. Depending on the input size, it may report a piece map for either the pre-spatial pooling or the post-spatial pooling tensor. """ check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # Alex's code requires # input channels to be <= 3 or a multiple of 4 # so we add dummy channels if necessary if not hasattr(self, 'dummy_channels'): self.dummy_channels = 0 if self.dummy_channels > 0: state_below = T.concatenate((state_below, T.zeros_like(state_below[0:self.dummy_channels, :, :, :])), axis=0) z = self.transformer.lmul(state_below) if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x') else: b = self.b.dimshuffle(0, 1, 2, 'x') z = z + b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) assert self.detector_space.num_channels % 16 == 0 if self.output_space.num_channels % 16 == 0: # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property piece = None if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces,:,:,:] if s is None: s = t piece = T.zeros_like(t) else: s = T.maximum(s, t) mask = T.eq(s, t) piece = mask * i + (1 - mask) * piece z = s if self.detector_normalization: z = self.detector_normalization(z) p = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) else: if self.detector_normalization is not None: raise NotImplementedError("We can't normalize the detector " "layer because the detector layer never exists as a " "stage of processing in this implementation.") z = max_pool_c01b(c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape) if self.num_pieces != 1: s = None piece = None for i in xrange(self.num_pieces): t = z[i::self.num_pieces,:,:,:] if s is None: s = t piece = T.zeros_like(t) else: s = T.maximum(s, t) mask = T.eq(s, t) piece = mask * i + (1- mask) * piece z = s p = z self.output_space.validate(p) if hasattr(self, 'min_zero') and self.min_zero: p = p * (p > 0.) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: p = self.output_normalization(p) return p, piece
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, "input_normalization"): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # Alex's code requires # input channels to be <= 3 or a multiple of 4 # so we add dummy channels if necessary if not hasattr(self, "dummy_channels"): self.dummy_channels = 0 if self.dummy_channels > 0: state_below = T.concatenate( (state_below, T.zeros_like(state_below[0 : self.dummy_channels, :, :, :])), axis=0 ) z = self.transformer.lmul(state_below) if not hasattr(self, "tied_b"): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, "x", "x", "x") else: b = self.b.dimshuffle(0, 1, 2, "x") z = z + b if self.layer_name is not None: z.name = self.layer_name + "_z" self.detector_space.validate(z) assert self.detector_space.num_channels % 16 == 0 if self.output_space.num_channels % 16 == 0: # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i :: self.num_pieces, :, :, :] if s is None: s = t else: s = T.maximum(s, t) z = s if self.detector_normalization: z = self.detector_normalization(z) p = max_pool_c01b( c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape ) else: if self.detector_normalization is not None: raise NotImplementedError( "We can't normalize the detector " "layer because the detector layer never exists as a " "stage of processing in this implementation." ) z = max_pool_c01b( c01b=z, pool_shape=self.pool_shape, pool_stride=self.pool_stride, image_shape=self.detector_space.shape ) if self.num_pieces != 1: s = None for i in xrange(self.num_pieces): t = z[i :: self.num_pieces, :, :, :] if s is None: s = t else: s = T.maximum(s, t) z = s p = z self.output_space.validate(p) if hasattr(self, "min_zero") and self.min_zero: p = p * (p > 0.0) if not hasattr(self, "output_normalization"): self.output_normalization = None if self.output_normalization: p = self.output_normalization(p) return p
def fprop(self, state_below): check_cuda(str(type(self))) self.input_space.validate(state_below) if not hasattr(self, 'input_normalization'): self.input_normalization = None if self.input_normalization: state_below = self.input_normalization(state_below) # fft 3d covolution z = self.transformer.lmul(state_below) # bias addition if not hasattr(self, 'tied_b'): self.tied_b = False if self.tied_b: b = self.b.dimshuffle(0, 'x', 'x', 'x', 'x') else: b = self.b.dimshuffle('x', 0, 1, 2, 3) z = z + self.b if self.layer_name is not None: z.name = self.layer_name + '_z' self.detector_space.validate(z) #assert self.detector_space.num_channels % 16 == 0 #ReLUs z = T.maximum(z, 0) if self.output_space.num_channels % 16 != 0: raise NotImplementedError("num channles should always be dvisible by 16") # alex's max pool op only works when the number of channels # is divisible by 16. we can only do the cross-channel pooling # first if the cross-channel pooling preserves that property # Pooling # permute axes ['b', 0, 1,'t','c'] -> ['c', 0, 1, 't', 'b'] (axes required for pooling ) z = z.dimshuffle(4, 1, 2, 3, 0) # spatial pooling x/y z_shape = z.shape z = z.reshape((z_shape[0], z_shape[1], z_shape[2], z_shape[3] * z_shape[4])) p = max_pool_c01b(c01b = z, pool_shape = self.pool_shape[0:2], pool_stride = self.pool_stride[0:2]) p = p.reshape((p.shape[0], p.shape[1], p.shape[2], z_shape[3], z_shape[4])) # temporal pooling with overlap (t) p_shape = p.shape #['c', 0, 1, 't', 'b'] -> ['c',0*1,'t','b'] ('c',0, 1,'b') for max_pool_c01b p = p.reshape((p_shape[0], p_shape[1] * p_shape[2], p_shape[3] , p_shape[4])) t = temporal_max_pool_c01b(c01b = p, pool_shape = [1, self.pool_shape[2]], pool_stride = [1, self.pool_stride[2]], image_shape = self.temp_pool_input_shape) t_shape = t.shape t = t.reshape((t_shape[0], p_shape[1] , p_shape[2], t_shape[2] , t_shape[3])) # Permute back axes ['c', 0, 1, 't', 'b'] -> ['b', 0, 1, 't', 'c'] t = t.dimshuffle(4, 1, 2, 3, 0) self.output_space.validate(t) if not hasattr(self, 'output_normalization'): self.output_normalization = None if self.output_normalization: t = self.output_normalization(t) return t