def unpooling(self, Y_4D, Z, X_4D): """ This method reverses pooling operation. """ Y = images2neibs(Y_4D, T.as_tensor_variable((1, Y_4D.shape[3]))) X = images2neibs(X_4D, T.as_tensor_variable((1, X_4D.shape[3]))) X_z = T.zeros_like(X) X_ = T.set_subtensor(X_z[T.arange(X.shape[0]).reshape((X.shape[0], 1)), Z], Y) return X_.reshape(X_4D.shape)
def pool_2d_i2n(input, ds=(2, 2), strides=None, pad=(0, 0), pool_function=T.max, mode='ignore_borders'): if strides is None: strides = ds if strides[0] > ds[0] or strides[1] > ds[1]: raise RuntimeError( "strides should be smaller than or equal to ds," " strides=(%d, %d) and ds=(%d, %d)" % (strides + ds)) shape = input.shape if pad != (0, 0): assert pool_function is T.max pad_x = pad[0] pad_y = pad[1] a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x * 2, shape[3] + pad_y * 2) input = T.set_subtensor(a[:, :, pad_x:pad_x + shape[2], pad_y:pad_y + shape[3]], input) shape = input.shape neibs = images2neibs(input, ds, strides, mode=mode) pooled_neibs = pool_function(neibs, axis=1) output_width = (shape[2] - ds[0]) // strides[0] + 1 output_height = (shape[3] - ds[1]) // strides[1] + 1 pooled_output = pooled_neibs.reshape((shape[0], shape[1], output_width, output_height)) return pooled_output
def pool_2d_i2n(input, ds=(2, 2), strides=None, pad=(0, 0), pool_function=T.max, mode='ignore_borders'): if strides is None: strides = ds if strides[0] > ds[0] or strides[1] > ds[1]: raise RuntimeError("strides should be smaller than or equal to ds," " strides=(%d, %d) and ds=(%d, %d)" % (strides + ds)) shape = input.shape if pad != (0, 0): assert pool_function is T.max pad_x = pad[0] pad_y = pad[1] a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x * 2, shape[3] + pad_y * 2) input = T.set_subtensor( a[:, :, pad_x:pad_x + shape[2], pad_y:pad_y + shape[3]], input) shape = input.shape neibs = images2neibs(input, ds, strides, mode=mode) pooled_neibs = pool_function(neibs, axis=1) output_width = (shape[2] - ds[0]) // strides[0] + 1 output_height = (shape[3] - ds[1]) // strides[1] + 1 pooled_output = pooled_neibs.reshape( (shape[0], shape[1], output_width, output_height)) return pooled_output
def im2col(inputX, fsize, stride, pad): assert inputX.ndim == 4 X = T.tensor4() Xpad = lasagnepad(X, pad, batch_ndim=2) neibs = images2neibs(Xpad, (fsize, fsize), (stride, stride), 'ignore_borders') im2colfn = theano.function([X], neibs, allow_input_downcast=True) return im2colfn(inputX)
def dynamic_kmaxPooling(self, curConv_out, k): neighborsForPooling = TSN.images2neibs( ten4=curConv_out, neib_shape=(1, curConv_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:, -k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast( T.join(0, T.as_tensor([neighborsForPooling.shape[0]]), T.as_tensor([k])), 'int64') pooledkmax_matrix = T.reshape(pooledkmaxTmp, new_shape, ndim=2) rightWidth = self.unifiedWidth - k right_padding = T.zeros((neighborsForPooling.shape[0], rightWidth), dtype=theano.config.floatX) matrix_padded = T.concatenate([pooledkmax_matrix, right_padding], axis=1) #recover tensor form new_shape = T.cast( T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]]), T.as_tensor([self.unifiedWidth])), 'int64') curPooled_out = T.reshape(matrix_padded, new_shape, ndim=4) return curPooled_out
def __init__(self, input, input_shape=None): if isinstance(input, Layer): self.input = input.output if input_shape == None: input_shape = input.output_shape else: self.input = input self.input_shape = input_shape #Only square image allowed assert input_shape[2] == input_shape[3] #Extend one pixel at each direction shapeext = input_shape[0], input_shape[ 1], input_shape[2] + 2, input_shape[3] + 2 inputext = T.alloc(dtypeX(-INF), *shapeext) inputext = T.set_subtensor( inputext[:, :, 1:input_shape[2] + 1, 1:input_shape[3] + 1], self.input) self.output = images2neibs(inputext, (3, 3), (2, 2), 'ignore_borders').mean(axis=-1) self.output_shape = input_shape[0], input_shape[1], ( input_shape[2] + 1) / 2, (input_shape[3] + 1) / 2
def link(self, input): self.input = input # select the lines where we apply k-max pooling neighbors_for_pooling = TSN.images2neibs( ten4=self.input, neib_shape=(self.input.shape[2], 1), # we look the max on every dimension mode='valid' # 'ignore_borders' ) neighbors_arg_sorted = T.argsort(neighbors_for_pooling, axis=1) k_neighbors_arg = neighbors_arg_sorted[:, -self.k_max:] k_neighbors_arg_sorted = T.sort(k_neighbors_arg, axis=1) ii = T.repeat(T.arange(neighbors_for_pooling.shape[0]), self.k_max) jj = k_neighbors_arg_sorted.flatten() flattened_pooled_out = neighbors_for_pooling[ii, jj] pooled_out_pre_shape = T.join( 0, self.input.shape[:-2], [self.input.shape[3]], [self.k_max] ) self.output = flattened_pooled_out.reshape( pooled_out_pre_shape, ndim=self.input.ndim ).dimshuffle(0, 1, 3, 2) return self.output
def dynamic_kmaxPooling(self, curConv_out, k): neighborsForPooling = TSN.images2neibs(ten4=curConv_out, neib_shape=(1,curConv_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast(T.join(0, T.as_tensor([neighborsForPooling.shape[0]]), T.as_tensor([k])), 'int64') pooledkmax_matrix = T.reshape(pooledkmaxTmp, new_shape, ndim=2) rightWidth=self.unifiedWidth-k right_padding = T.zeros((neighborsForPooling.shape[0], rightWidth), dtype=theano.config.floatX) matrix_padded = T.concatenate([pooledkmax_matrix, right_padding], axis=1) #recover tensor form new_shape = T.cast(T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]]), T.as_tensor([self.unifiedWidth])), 'int64') curPooled_out = T.reshape(matrix_padded, new_shape, ndim=4) return curPooled_out
def __init__(self, input, input_shape=None): if isinstance(input, Layer): self.input = input.output Layer.linkstruct[input].append(self) if input_shape == None: input_shape = input.output_shape else: self.input = input self.input_shape = input_shape #Only square image allowed assert input_shape[2] == input_shape[3] #Extend one pixel at each direction shapeext = input_shape[0], input_shape[ 1], input_shape[2] + 2, input_shape[3] + 2 inputext = CachedAlloc(dtypeX(-INF), *shapeext) inputext = T.set_subtensor( inputext[:, :, 1:input_shape[2] + 1, 1:input_shape[3] + 1], self.input) self.output_shape = input_shape[0], input_shape[1], ( input_shape[2] + 1) / 2, (input_shape[3] + 1) / 2 self.output = images2neibs(inputext, (3, 3), (2, 2), 'ignore_borders').mean(axis=-1) self.output = T.patternbroadcast( self.output.reshape(self.output_shape), (False, ) * 4)
def Fold(self, conv_out, ds=(2,1)): '''Fold into two. (Sum up vertical neighbours)''' imgs = images2neibs(conv_out, T.as_tensor_variable(ds), mode='ignore_borders') # Correct 'mode' if there's a typo! orig = conv_out.shape shp = (orig[0], orig[1], T.cast(orig[2]/2, 'int32'), orig[3]) res = T.reshape(T.sum(imgs, axis=-1), shp) return res
def extract_image_patches(X, ksizes, strides, padding='valid', data_format='channels_first'): patch_size = ksizes[1] if padding == 'same': padding = 'ignore_borders' if data_format == 'channels_last': X = KTH.permute_dimensions(X, [0, 3, 1, 2]) # Thanks to https://github.com/awentzonline for the help! batch, c, w, h = KTH.shape(X) xs = KTH.shape(X) num_rows = 1 + (xs[-2] - patch_size) // strides[1] num_cols = 1 + (xs[-1] - patch_size) // strides[1] num_channels = xs[-3] patches = images2neibs(X, ksizes, strides, padding) # Theano is sorting by channel patches = KTH.reshape( patches, (batch, num_channels, num_rows * num_cols, patch_size, patch_size)) patches = KTH.permute_dimensions(patches, (0, 2, 1, 3, 4)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = KTH.reshape( patches, (batch, num_rows, num_cols, num_channels, patch_size, patch_size)) if data_format == 'channels_last': patches = KTH.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def preparePooling(self, input3D): neighborsForPooling = TSN.images2neibs(ten4=input3D.reshape( (1, input3D.shape[0], input3D.shape[1], input3D.shape[2])), neib_shape=(1, input3D.shape[2]), mode='ignore_borders') neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) return neighborsForPooling, neighborsArgSorted
def preparePooling(self, conv_out): neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1, conv_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) neighborsArgSorted = neighborsArgSorted return neighborsForPooling, neighborsArgSorted
def fold(conv): c_shape = conv.shape pool_size = (1, conv.shape[-1]) neighbors_to_pool = TSN.images2neibs(ten4=conv, neib_shape=pool_size, mode='ignore_borders') n_shape = neighbors_to_pool.shape paired = T.reshape(neighbors_to_pool, (n_shape[0] / 2, 2, n_shape[-1])) summed = T.sum(paired, axis=1) folded_out = T.reshape(summed, (c_shape[0], c_shape[1], c_shape[2] / 2, c_shape[3]), ndim=4) return folded_out
def kmaxPool(self, conv_out, pool_shape, k): ''' Perform k-max Pooling. ''' n0, n1, d, size = pool_shape imgs = images2neibs(conv_out, T.as_tensor_variable((1, size))) indices = T.argsort(T.mul(imgs, -1)) k_max_indices = T.sort(indices[:, :k]) S = T.arange(d*n1*n0).reshape((d*n1*n0, 1)) return imgs[S, k_max_indices].reshape((n0, n1, d, k))
def __init__(self, conv_out, k=1): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ #images2neibs produces a 2D matrix neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(conv_out.shape[2], 1), mode='ignore_borders') #k = poolsize[1] neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] # reshape pooledkmaxTmp new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[3]]), T.as_tensor([k])), 'int32') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) # downsample each feature map individually, using maxpooling ''' pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out)
def im2col(inputX, fsize, stride, pad): assert inputX.ndim == 4 Xrows, Xcols = inputX.shape[-2:] X = T.tensor4() if pad is None: # 保持下和右的边界 rowpad = stride - (Xrows - fsize) % stride colpad = stride - (Xcols - fsize) % stride pad = ((0, rowpad), (0, colpad)) Xpad = lasagnepad(X, pad, batch_ndim=2) neibs = images2neibs(Xpad, (fsize, fsize), (stride, stride), 'ignore_borders') im2colfn = theano.function([X], neibs, allow_input_downcast=True) return im2colfn(inputX)
def fold(conv): c_shape = conv.shape pool_size = (1, conv.shape[-1]) neighbors_to_pool = TSN.images2neibs(ten4=conv, neib_shape=pool_size, mode='ignore_borders') n_shape = neighbors_to_pool.shape paired = T.reshape(neighbors_to_pool, (n_shape[0] / 2, 2, n_shape[-1])) summed = T.sum(paired, axis=1) folded_out = T.reshape( summed, (c_shape[0], c_shape[1], c_shape[2] / 2, c_shape[3]), ndim=4) return folded_out
def k_max_pool(conv, k): c_shape = conv.shape # c_shape = tPrint('conv_shape')(c_shape) pool_size = (1, conv.shape[-1]) neighbors_to_pool = TSN.images2neibs(ten4=conv, neib_shape=pool_size, mode='ignore_borders') arg_sorted = T.argsort(neighbors_to_pool, axis=1) top_k = arg_sorted[:, -k:] top_k_sorted = T.sort(top_k, axis=1) ii = T.repeat(T.arange(neighbors_to_pool.shape[0], dtype='int32'), k) jj = top_k_sorted.flatten() values = neighbors_to_pool[ii, jj] pooled_out = T.reshape(values, (c_shape[0], c_shape[1], c_shape[2], k), ndim=4) return pooled_out
def cifar10neighbs(topo, patch_shape): assert topo.ndim == 4 r, c = patch_shape topo = as_tensor_variable(topo) flat = images2neibs(ten4 = topo.dimshuffle(3,0,1,2), neib_shape = (r,c), neib_step = (1,1)) m = flat.shape[0] / 3 n = flat.shape[1] * 3 red = flat[0:m,:] green = flat[m:2*m,:] blue = flat[2*m:,:] rval = T.concatenate((red,green,blue),axis=1) return rval
def cifar10neighbs(topo, patch_shape): assert topo.ndim == 4 r, c = patch_shape topo = as_tensor_variable(topo) flat = images2neibs(ten4=topo.dimshuffle(3, 0, 1, 2), neib_shape=(r, c), neib_step=(1, 1)) m = flat.shape[0] / 3 n = flat.shape[1] * 3 red = flat[0:m, :] green = flat[m:2 * m, :] blue = flat[2 * m:, :] rval = T.concatenate((red, green, blue), axis=1) return rval
def extract_image_patches(X, ksizes, strides, padding="valid", data_format="channels_first"): """ Extract the patches from an image Parameters ---------- X : The input image ksizes : 2-d tuple with the kernel size strides : 2-d tuple with the strides size padding : 'same' or 'valid' data_format : 'channels_last' or 'channels_first' Returns ------- The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) """ patch_size = ksizes[1] if padding == "same": padding = "ignore_borders" if data_format == "channels_last": X = KTH.permute_dimensions(X, [0, 3, 1, 2]) # Thanks to https://github.com/awentzonline for the help! batch, c, w, h = KTH.shape(X) xs = KTH.shape(X) num_rows = 1 + (xs[-2] - patch_size) // strides[1] num_cols = 1 + (xs[-1] - patch_size) // strides[1] num_channels = xs[-3] patches = images2neibs(X, ksizes, strides, padding) # Theano is sorting by channel new_shape = (batch, num_channels, num_rows * num_cols, patch_size, patch_size) patches = KTH.reshape(patches, new_shape) patches = KTH.permute_dimensions(patches, (0, 2, 1, 3, 4)) # arrange in a 2d-grid (rows, cols, channels, px, py) new_shape = (batch, num_rows, num_cols, num_channels, patch_size, patch_size) patches = KTH.reshape(patches, new_shape) if data_format == "channels_last": patches = KTH.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def extract_image_patches(X, ksizes, strides, border_mode="valid", dim_ordering="th"): ''' Extract the patches from an image Parameters ---------- X : The input image ksizes : 2-d tuple with the kernel size strides : 2-d tuple with the strides size border_mode : 'same' or 'valid' dim_ordering : 'tf' or 'th' Returns ------- The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) ''' patch_size = ksizes[1] if border_mode == "same": border_mode = "ignore_borders" if dim_ordering == "tf": X = KTH.permute_dimensions(X, [0, 3, 1, 2]) # Thanks to https://github.com/awentzonline for the help! batch, c, w, h = KTH.shape(X) xs = KTH.shape(X) num_rows = 1 + (xs[-2] - patch_size) // strides[1] num_cols = 1 + (xs[-1] - patch_size) // strides[1] num_channels = xs[-3] patches = images2neibs(X, ksizes, strides, border_mode) # Theano is sorting by channel patches = KTH.reshape(patches, (batch, num_channels, KTH.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = KTH.permute_dimensions(patches, (0, 2, 1, 3, 4)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = KTH.reshape( patches, (batch, num_rows, num_cols, num_channels, patch_size, patch_size)) if dim_ordering == "tf": patches = KTH.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def kmaxPooling(self, fold_out, k): neighborsForPooling = TSN.images2neibs(ten4=fold_out, neib_shape=(1,fold_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast(T.join(0, fold_out.shape[:-2], T.as_tensor([fold_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) return pooled_out
def im2col_compfn(shape, fsize, stride, pad, ignore_border=False): assert len(shape) == 2 assert isinstance(pad, int) if isinstance(fsize, (int, float)): fsize = (int(fsize), int(fsize)) if isinstance(stride, (int, float)): stride = (int(stride), int(stride)) X = T.tensor4() if not ignore_border: # 保持下和右的边界 rows, cols = shape rows, cols = rows + 2 * pad, cols + 2 * pad rowpad = colpad = 0 rowrem = (rows - fsize[0]) % stride[0] if rowrem: rowpad = stride[0] - rowrem colrem = (cols - fsize[1]) % stride[1] if colrem: colpad = stride[1] - colrem pad = ((pad, pad + rowpad), (pad, pad + colpad)) Xpad = lasagnepad(X, pad, batch_ndim=2) neibs = images2neibs(Xpad, fsize, stride, 'ignore_borders') im2colfn = theano.function([X], neibs, allow_input_downcast=True) return im2colfn
def pool_2d_i2n(input, ds=(2, 2), strides=None, pool_function=T.max, mode='ignore_borders'): if strides is None: strides = ds if strides[0] > ds[0] or strides[1] > ds[1]: raise RuntimeError( "strides should be smaller than or equal to ds," " strides=(%d, %d) and ds=(%d, %d)" % (strides + ds)) shape = input.shape neibs = images2neibs(input, ds, strides, mode=mode) pooled_neibs = pool_function(neibs, axis=1) output_width = (shape[2] - ds[0]) // strides[0] + 1 output_height = (shape[3] - ds[1]) // strides[1] + 1 pooled_output = pooled_neibs.reshape((shape[0], shape[1], output_width, output_height)) return pooled_output
def kmaxPooling(self, fold_out, k): neighborsForPooling = TSN.images2neibs(ten4=fold_out, neib_shape=(1, fold_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:, -k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast( T.join(0, fold_out.shape[:-2], T.as_tensor([fold_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) return pooled_out
def __init__(self,input,input_shape = None): if isinstance(input, Layer): self.input = input.output Layer.linkstruct[input].append(self) if input_shape == None: input_shape = input.output_shape else: self.input = input self.input_shape = input_shape #Only square image allowed assert input_shape[2]==input_shape[3] #Extend one pixel at each direction shapeext = input_shape[0], input_shape[1], input_shape[2]+2, input_shape[3]+2 inputext = CachedAlloc(dtypeX(-INF), *shapeext) inputext = T.set_subtensor(inputext[:,:,1:input_shape[2]+1,1:input_shape[3]+1], self.input) self.output_shape = input_shape[0], input_shape[1], (input_shape[2]+1)/2, (input_shape[3]+1)/2 self.output = images2neibs(inputext, (3,3), (2,2), 'ignore_borders').mean(axis=-1) self.output = T.patternbroadcast(self.output.reshape(self.output_shape),(False,)*4)
def __init__(self,input,input_shape = None): if isinstance(input, Layer): self.input = input.output if input_shape == None: input_shape = input.output_shape else: self.input = input self.input_shape = input_shape ##Only square image allowed #assert input_shape[2]==input_shape[3] #Extend one pixel at each direction shapeext = input_shape[0], input_shape[1], input_shape[2]+2, input_shape[3]+2 inputext = T.alloc(dtypeX(-INF), *shapeext) inputext = T.set_subtensor(inputext[:,:,1:input_shape[2]+1,1:input_shape[3]+1], self.input) self.output_shape = input_shape[0], input_shape[1], (input_shape[2]+1)/2, (input_shape[3]+1)/2 self.output = images2neibs(inputext, (3,3), (2,2), 'ignore_borders').mean(axis=-1) self.output = self.output.reshape(self.output_shape)
__author__ = 'darshanhegde' """ k-max pooling example. """ import numpy as np import theano from theano import tensor as T from theano.sandbox import neighbours k = 3 # instantiate 4D tensor for input input = T.tensor4(name='input') neighborsForPooling = neighbours.images2neibs(input, (1, 5), mode='valid') neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:, -k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() k_pooled_2D = neighborsForPooling[ii, jj].reshape((3, k)) k_pooled = neighbours.neibs2images(k_pooled_2D, (1, 3), (1, 3, 1, 3)) k_max = theano.function([input], k_pooled) input = np.array([[2, 4, 1, 6, 8], [12, 3, 5, 7, 1], [-8, 6, -12, 4, 1]], dtype=np.float32) input = input.reshape(1, 3, 1, 5) print "input shape: ", input.shape print "input: ", input output = k_max(input)
def encoder(image, centroids, W, M, fo=None, chunk_size=999, batch_size=333, Y=None): ''' input: image : N x D (N, n_channels*HSize*WSize) centroids : K x d (K, d=n_channels*psize*psize) W : d x d M : 1 x d output: features: N x 4*K ''' N = image.shape[0] # chunk to allocate on GPU n_chunks = np.int(np.ceil(1.0*N/chunk_size)) # batch to process in a single pass n_batches = chunk_size/batch_size assert(n_batches*batch_size == chunk_size) n_channels = 3 imSize = np.int(np.sqrt(image.shape[1]/n_channels)) assert(np.square(imSize)*3 == image.shape[1]) # image = image.reshape(-1, n_channels, imSize, imSize) # # #pad with zeros # pad = np.zeros((chunk_size*n_chunks-N, image.shape[1], image.shape[2], # image.shape[3]), dtype=image.dtype) # image = np.vstack([image, pad]) num_centroids = centroids.shape[0] psize = np.int(np.sqrt(centroids.shape[1]/n_channels)) assert(np.square(psize)*n_channels == centroids.shape[1]) h = imSize - psize + 1 w = imSize - psize + 1 if fo is not None: X_dataset = fo.create_dataset("X", (N, 4*num_centroids), dtype=config.floatX) if Y is not None: fo.create_dataset("y", data=Y) img = np.float32(image[:chunk_size]) #pad with zeros if img.shape[0]<chunk_size: pad = np.zeros((chunk_size-img.shape[0], img.shape[1]), dtype=img.dtype) img = np.vstack([img, pad]) X = shared(img.reshape(-1, n_channels, imSize, imSize), borrow=True) C = shared(np.float32(centroids), borrow=True) W = shared(np.float32(W), borrow=True) M = shared(np.float32(M), borrow=True, broadcastable=(True, False)) cc = T.square(C).sum(axis=1, keepdims=True).T # 1 x K im = T.tensor4(dtype=config.floatX) eyef = T.eye(psize * psize, psize * psize, dtype=config.floatX)[::-1] filts = T.reshape(eyef, (psize * psize, psize, psize)) filts = T.shape_padleft(filts).dimshuffle((1, 0, 2, 3)) res = T.zeros((n_channels, batch_size, psize * psize, h, w), dtype=config.floatX) for i in xrange(n_channels): cur_slice = T.shape_padleft(im[:, i, :, :]).dimshuffle((1, 0, 2, 3)) res = T.set_subtensor(res[i], conv.conv2d(cur_slice, filts)) # res ~ (channel, batch, conv, hi, wi) -> (batch, hi, wi, channel, conv) # -> (batch, hi*wi, channel*h*w) res = res.dimshuffle((1, 3, 4, 0, 2)).\ reshape((batch_size*h*w, n_channels*psize*psize)) # Normalize the brightness and contrast separately for each patch. epsilon = 10 mean_ = T.cast(res.mean(axis=1, keepdims=True), config.floatX) dof = n_channels*psize*psize # adjust DOF var_ = T.cast(res.var(axis=1, keepdims=True), config.floatX)*dof/(dof-1) res = (res-mean_)/T.sqrt(var_+epsilon) # Whitening res = T.dot(res-M, W) # batch*h*w x n_channels*psize*psize #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # normalise to unit length #res /= T.sqrt(T.sqr(res).sum(axis=1, keepdims=True)) cx = T.dot(res, C.T) # batch*h*w x K res = T.sqr(res).sum(axis=1, keepdims=True) # batch*h*w x 1 distance = cc - 2*cx + res # batch*h*w x K distance *= (distance > 0) # precision issue distance = T.sqrt(distance) batch = distance.mean(axis=1, keepdims=True) - distance batch = batch.reshape((batch_size, h, w, num_centroids)).\ dimshuffle(0, 3, 1, 2) # batch x K x h x w batch *= (batch > 0) # ReLU if np.int(h/2)*2 == h: half = np.int(h/2) padded = batch else: half = np.int((h+1)/2) padded = T.zeros((batch_size, num_centroids, h+1, w+1)) padded = T.set_subtensor(padded[:, :, :h, :w], batch) pool_sum = TSN.images2neibs(padded, (half, half)) # batch*K*4 x h*w/4 pool_out = pool_sum.sum(axis=-1).\ reshape((batch_size, num_centroids, 2, 2)).dimshuffle(0, 3, 2, 1).\ reshape((batch_size, 4*num_centroids)) # batch x 4*K index = T.iscalar() encode = function(inputs=[index], outputs=pool_out, givens={im: X[(index*batch_size):((index+1)*batch_size)]}) # Main loop t0 = time.time() features = [] for k in xrange(n_chunks): start = chunk_size*k stop = chunk_size*(k+1) if k > 0: img = np.float32(image[start:stop]) #pad with zeros if img.shape[0]<chunk_size: pad = np.zeros((chunk_size-img.shape[0], img.shape[1]), dtype=img.dtype) img = np.vstack([img, pad]) X.set_value(img.reshape(-1, n_channels, imSize, imSize), borrow=True) features_chunk = np.vstack([encode(i) for i in xrange(n_batches)]) if fo is None: features.append(features_chunk) else: # dump to file if k == n_chunks-1 and N != n_chunks*chunk_size: X_dataset[start:N] = features_chunk[:np.mod(N, chunk_size)] else: X_dataset[start:stop] = features_chunk print 'Encoder: chunk %d/%d' % (k+1, n_chunks) t1 = time.time() print 'Elapsed %d seconds' % (t1 - t0) if fo is None: return np.vstack(features)[:N]
def __init__(self, potentialWidth, potentialHeight, columnsWidth, columnsHeight, inputWidth, inputHeight, centerPotSynapses, connectedPerm, minOverlap, wrapInput): # Overlap Parameters ########################################### # Specifies if the potential synapses are centered # over the columns self.centerPotSynapses = centerPotSynapses # Use a wrap input function instead of padding the input # to calcualte the overlap scores. self.wrapInput = wrapInput self.potentialWidth = potentialWidth self.potentialHeight = potentialHeight self.connectedPermParam = connectedPerm self.minOverlap = minOverlap self.inputWidth = inputWidth self.inputHeight = inputHeight # Calculate how many columns are expected from these # parameters. self.columnsWidth = columnsWidth self.columnsHeight = columnsHeight self.numColumns = columnsWidth * columnsHeight # Store the potetnial inputs to every column. # Each row represents the inputs a columns potential synapses cover. self.colInputPotSyn = None # Store the potential overlap values for every column self.colPotOverlaps = None # StepX and Step Y describe how far each # columns potential synapses differ from the adjacent # columns in the X and Y directions. These parameters can't # change as theano uses them to setup functions. self.stepX, self.stepY = self.getStepSizes(inputWidth, inputHeight, self.columnsWidth, self.columnsHeight, self.potentialWidth, self.potentialHeight) # Contruct a tiebreaker matrix for the columns potential synapses. # It contains small values that help resolve any ties in potential # overlap scores for columns. self.potSynTieBreaker = np.array([[0.0 for i in range(self.potentialHeight*self.potentialWidth)] for j in range(self.numColumns)]) #import ipdb; ipdb.set_trace() self.makePotSynTieBreaker(self.potSynTieBreaker) # Store the potential inputs to every column plus the tie breaker value. # Each row represents the inputs a columns potential synapses cover. self.colInputPotSynTie = np.array([[0.0 for i in range(self.potentialHeight*self.potentialWidth)] for j in range(self.numColumns)]) self.colTieBreaker = np.array([0.0 for i in range(self.numColumns)]) self.makeColTieBreaker(self.colTieBreaker) # Create theano variables and functions ############################################ # Create the theano function for calculating # the multiplication elementwise of 2 matricies. self.i_grid = T.matrix(dtype='float32') self.j_grid = T.matrix(dtype='float32') self.multi_vals = self.i_grid * self.j_grid self.multi_grids = function([self.i_grid, self.j_grid], self.multi_vals, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the addition of a small tie breaker value to each matrix input. self.o_grid = T.matrix(dtype='float32') self.tie_grid = T.matrix(dtype='float32') self.add_vals = T.add(self.o_grid, self.tie_grid) self.add_tieBreaker = function([self.o_grid, self.tie_grid], self.add_vals, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the addition of a small tie breaker value to each matrix input. self.o_vect = T.vector(dtype='float32') self.tie_vect = T.vector(dtype='float32') self.add_vectVals = T.add(self.o_vect, self.tie_vect) self.add_vectTieBreaker = function([self.o_vect, self.tie_vect], self.add_vectVals, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the inputs to a column from an input grid. self.kernalSize = (potentialHeight, potentialWidth) # poolstep is how far to move the kernal in each direction. self.poolstep = (self.stepY, self.stepX) # Create the theano function for calculating the input to each column self.neib_shape = T.as_tensor_variable(self.kernalSize) self.neib_step = T.as_tensor_variable(self.poolstep) self.pool_inp = T.tensor4('pool_input', dtype='float32') self.pool_convole = images2neibs(self.pool_inp, self.neib_shape, self.neib_step, mode='valid') self.pool_inputs = function([self.pool_inp], self.pool_convole, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the inputs to a column from an input grid. # Uses a wrapping function when calculating the convole. # The kernel size must be an odd shape for the wrapping function to work. # When wrapping make sure the potential pools shapes are smaller then the total inputs. # This is a restriction imposed by theanos wrapping function. Also keep the potential pool shape odd. if self.wrapInput == True: if potentialHeight % 2 != 1: print "WARNING: The columns potential height is not odd = %s" %self.potentialHeight print " The overlap calculators wrapping function requires odd pooling shapes smaller then the input." if potentialWidth % 2 != 1: print "WARNING: The columns potential width is not odd = %s" %self.potentialWidth print " The overlap calculators wrapping function requires odd pooling shapes smaller then the input." assert potentialHeight % 2 == 1 assert potentialWidth % 2 == 1 #import ipdb; ipdb.set_trace() if self.wrapInput == True: self.kernalSize_wrap = (self.potentialHeight, self.potentialWidth) else: self.kernalSize_wrap = (0,0) # poolstep is how far to move the kernal in each direction. self.poolstep_wrap = (self.stepY, self.stepX) # Create the theano function for calculating the input to each column self.neib_shape_wrap = T.as_tensor_variable(self.kernalSize_wrap) self.neib_step_wrap = T.as_tensor_variable(self.poolstep_wrap) self.pool_inp_wrap = T.tensor4('pool_input_wrap', dtype='float32') self.pool_convole_wrap = images2neibs(self.pool_inp_wrap, self.neib_shape_wrap, self.neib_step_wrap, mode='wrap_centered') self.pool_inputs_wrap = function([self.pool_inp_wrap], self.pool_convole_wrap, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # which synapses are connected. self.j = T.matrix('poolConnInput', dtype='float32') self.k = T.matrix('synInputVal', dtype='float32') self.connectedPermanence = T.matrix('con_perm', dtype='float32') # Compare the input matrix j to the scalar parameter. # If the matrix value is less then the connectedPermParam # return zero. self.checkConn = T.switch(T.lt(self.connectedPermParam, self.j), self.k, 0.0) # Use enable downcast so the numpy arrays of float 64 can be downcast to float32 self.getConnectedSynInput = function([self.j, self.k], self.checkConn, mode=Mode(linker='vm'), allow_input_downcast=True) # Create the theano function for calculating # the overlap of each col self.b = T.matrix(dtype='float32') self.m = self.b.sum(axis=1) self.calcOverlap = function([self.b], self.m, allow_input_downcast=True) # Create the theano function for calculating # if an overlap value is greater then minOverlap. # If not then set to zero. self.currOverlap = T.vector(dtype='float32') self.ch_over = T.switch(T.ge(self.currOverlap, self.minOverlap), self.currOverlap, 0.0) self.checkMinOverlap = function([self.currOverlap], self.ch_over, allow_input_downcast=True) # Create the theano function for calculating # the x and y indicies from a input element index. # The input matrix contains a number representing a potential # synapse and the position that the synpase connects to in the input # grid. Convert this into a col, row index and output it into 2 matricies, # one for the row number the second for the columns number. # This gives the position info for all the potential synapse for every column. self.inputGridWidth = T.scalar(dtype='int32') #self.inputGridHeight = T.scalar(dtype='int32') self.inputInd = T.matrix(dtype='int32') self.potSyn_XYInd = (self.inputInd / self.inputGridWidth, self.inputInd % self.inputGridWidth) self.check_notpadding = T.switch(T.gt(self.inputInd, 0), self.potSyn_XYInd, -1) self.convert_indicesToXY = function([self.inputGridWidth, #self.inputGridHeight, self.inputInd], self.potSyn_XYInd, allow_input_downcast=True)
def encoder(image, centroids, W, M, fo=None, chunk_size=999, batch_size=333, Y=None): ''' input: image : N x D (N, n_channels*HSize*WSize) centroids : K x d (K, d=n_channels*psize*psize) W : d x d M : 1 x d output: features: N x 4*K ''' N = image.shape[0] # chunk to allocate on GPU n_chunks = np.int(np.ceil(1.0 * N / chunk_size)) # batch to process in a single pass n_batches = chunk_size / batch_size assert (n_batches * batch_size == chunk_size) n_channels = 3 imSize = np.int(np.sqrt(image.shape[1] / n_channels)) assert (np.square(imSize) * 3 == image.shape[1]) # image = image.reshape(-1, n_channels, imSize, imSize) # # #pad with zeros # pad = np.zeros((chunk_size*n_chunks-N, image.shape[1], image.shape[2], # image.shape[3]), dtype=image.dtype) # image = np.vstack([image, pad]) num_centroids = centroids.shape[0] psize = np.int(np.sqrt(centroids.shape[1] / n_channels)) assert (np.square(psize) * n_channels == centroids.shape[1]) h = imSize - psize + 1 w = imSize - psize + 1 if fo is not None: X_dataset = fo.create_dataset("X", (N, 4 * num_centroids), dtype=config.floatX) if Y is not None: fo.create_dataset("y", data=Y) img = np.float32(image[:chunk_size]) #pad with zeros if img.shape[0] < chunk_size: pad = np.zeros((chunk_size - img.shape[0], img.shape[1]), dtype=img.dtype) img = np.vstack([img, pad]) X = shared(img.reshape(-1, n_channels, imSize, imSize), borrow=True) C = shared(np.float32(centroids), borrow=True) W = shared(np.float32(W), borrow=True) M = shared(np.float32(M), borrow=True, broadcastable=(True, False)) cc = T.square(C).sum(axis=1, keepdims=True).T # 1 x K im = T.tensor4(dtype=config.floatX) eyef = T.eye(psize * psize, psize * psize, dtype=config.floatX)[::-1] filts = T.reshape(eyef, (psize * psize, psize, psize)) filts = T.shape_padleft(filts).dimshuffle((1, 0, 2, 3)) res = T.zeros((n_channels, batch_size, psize * psize, h, w), dtype=config.floatX) for i in xrange(n_channels): cur_slice = T.shape_padleft(im[:, i, :, :]).dimshuffle((1, 0, 2, 3)) res = T.set_subtensor(res[i], conv.conv2d(cur_slice, filts)) # res ~ (channel, batch, conv, hi, wi) -> (batch, hi, wi, channel, conv) # -> (batch, hi*wi, channel*h*w) res = res.dimshuffle((1, 3, 4, 0, 2)).\ reshape((batch_size*h*w, n_channels*psize*psize)) # Normalize the brightness and contrast separately for each patch. epsilon = 10 mean_ = T.cast(res.mean(axis=1, keepdims=True), config.floatX) dof = n_channels * psize * psize # adjust DOF var_ = T.cast(res.var(axis=1, keepdims=True), config.floatX) * dof / (dof - 1) res = (res - mean_) / T.sqrt(var_ + epsilon) # Whitening res = T.dot(res - M, W) # batch*h*w x n_channels*psize*psize #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # normalise to unit length #res /= T.sqrt(T.sqr(res).sum(axis=1, keepdims=True)) cx = T.dot(res, C.T) # batch*h*w x K res = T.sqr(res).sum(axis=1, keepdims=True) # batch*h*w x 1 distance = cc - 2 * cx + res # batch*h*w x K distance *= (distance > 0) # precision issue distance = T.sqrt(distance) batch = distance.mean(axis=1, keepdims=True) - distance batch = batch.reshape((batch_size, h, w, num_centroids)).\ dimshuffle(0, 3, 1, 2) # batch x K x h x w batch *= (batch > 0) # ReLU if np.int(h / 2) * 2 == h: half = np.int(h / 2) padded = batch else: half = np.int((h + 1) / 2) padded = T.zeros((batch_size, num_centroids, h + 1, w + 1)) padded = T.set_subtensor(padded[:, :, :h, :w], batch) pool_sum = TSN.images2neibs(padded, (half, half)) # batch*K*4 x h*w/4 pool_out = pool_sum.sum(axis=-1).\ reshape((batch_size, num_centroids, 2, 2)).dimshuffle(0, 3, 2, 1).\ reshape((batch_size, 4*num_centroids)) # batch x 4*K index = T.iscalar() encode = function( inputs=[index], outputs=pool_out, givens={im: X[(index * batch_size):((index + 1) * batch_size)]}) # Main loop t0 = time.time() features = [] for k in xrange(n_chunks): start = chunk_size * k stop = chunk_size * (k + 1) if k > 0: img = np.float32(image[start:stop]) #pad with zeros if img.shape[0] < chunk_size: pad = np.zeros((chunk_size - img.shape[0], img.shape[1]), dtype=img.dtype) img = np.vstack([img, pad]) X.set_value(img.reshape(-1, n_channels, imSize, imSize), borrow=True) features_chunk = np.vstack([encode(i) for i in xrange(n_batches)]) if fo is None: features.append(features_chunk) else: # dump to file if k == n_chunks - 1 and N != n_chunks * chunk_size: X_dataset[start:N] = features_chunk[:np.mod(N, chunk_size)] else: X_dataset[start:stop] = features_chunk print 'Encoder: chunk %d/%d' % (k + 1, n_chunks) t1 = time.time() print 'Elapsed %d seconds' % (t1 - t0) if fo is None: return np.vstack(features)[:N]
from theano import tensor as T from theano.sandbox.neighbours import images2neibs X = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() Y = images2neibs(X, (2, 2)) W = T.matrix() Z = T.dot(Y, W) cost = Z.sum() T.grad(cost, W)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), k=4, left=[], right=[]): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape, border_mode='full') #folding matrix_shape=T.cast(T.join(0, T.as_tensor([T.prod(conv_out.shape[:-1])]), T.as_tensor([conv_out.shape[3]])), 'int64') matrix = T.reshape(conv_out, matrix_shape, ndim=2) odd_matrix=matrix[0:matrix_shape[0]:2] even_matrix=matrix[1:matrix_shape[0]:2] raw_folded_matrix=odd_matrix+even_matrix out_shape=T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]/2]), T.as_tensor([conv_out.shape[3]])), 'int64') fold_out=T.reshape(raw_folded_matrix, out_shape, ndim=4) #ktop-max pooling matrices=[] for i in range(image_shape[0]): # image_shape[0] is actually batch_size neighborsForPooling = TSN.images2neibs(ten4=fold_out[i:(i+1)], neib_shape=(1,fold_out.shape[3]), mode='ignore_borders') non_zeros=neighborsForPooling[:,left[i]:(neighborsForPooling.shape[1]-right[i])] #neighborsForPooling=neighborsForPooling[:,leftBound:(rightBound+1)] # only consider non-zero elements neighborsArgSorted = T.argsort(non_zeros, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) # make y indices in acending lie ii = T.repeat(T.arange(non_zeros.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = non_zeros[ii, jj] # now, should be a vector ''' new_shape = T.cast(T.join(0, T.as_tensor([non_zeros.shape[0]]), T.as_tensor([k])), 'int64') pooledkmaxTmp = T.reshape(pooledkmaxTmp, new_shape, ndim=2) ''' matrices.append(pooledkmaxTmp) overall_matrix=T.concatenate(matrices, axis=0) new_shape = T.cast(T.join(0, fold_out.shape[:-2], T.as_tensor([fold_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(overall_matrix, new_shape, ndim=4) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def __init__(self, width, height, potentialInhibWidth, potentialInhibHeight, desiredLocalActivity, minOverlap, centerInhib=1): # Temporal Parameters ########################################### # Specifies if the potential synapses are centered # over the columns self.centerInhib = centerInhib self.width = width self.height = height self.potentialWidth = potentialInhibWidth self.potentialHeight = potentialInhibHeight self.areaKernel = self.potentialWidth * self.potentialHeight self.desiredLocalActivity = desiredLocalActivity self.minOverlap = minOverlap # Store how much padding is added to the input grid self.topPos_y = 0 self.bottomPos_y = 0 self.leftPos_x = 0 self.rightPos_x = 0 # Create theano variables and functions ############################################ # Create the theano function for calculating # if the colInConvole matrix. This takes a vector # storing an offset number and adds this to the input # matrix if the element in the input matrix is greater then # zero. self.in_colPatMat = T.matrix(dtype='int32') self.in_colAddVect = T.vector(dtype='int32') self.in_colNegVect = T.vector(dtype='int32') self.col_num3 = T.matrix(dtype='int32') self.check_gtZero2 = T.switch(T.gt(self.in_colPatMat, 0), (self.in_colPatMat + self.in_colAddVect[self.col_num3] - self.in_colNegVect[self.col_num3]+1), 0) self.add_toConvolePat = function([self.in_colPatMat, self.in_colAddVect, self.in_colNegVect, self.col_num3], self.check_gtZero2, allow_input_downcast=True) # Create the theano function for calculating # the addition of a small tie breaker value to each overlap value. self.o_grid = T.matrix(dtype='float32') self.tie_grid = T.matrix(dtype='float32') self.add_vals = T.add(self.o_grid, self.tie_grid) self.add_tieBreaker = function([self.o_grid, self.tie_grid], self.add_vals, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the inputs to a column from an input grid. self.kernalSize = (self.potentialHeight, self.potentialWidth) # poolstep is how far to move the kernal in each direction. self.poolstep = (1, 1) # Create the theano function for calculating the overlaps of # the potential columns that any column can inhibit. self.neib_shape = T.as_tensor_variable(self.kernalSize) self.neib_step = T.as_tensor_variable(self.poolstep) self.pool_inp = T.tensor4('pool_input', dtype='float32') self.pool_convole = images2neibs(self.pool_inp, self.neib_shape, self.neib_step, mode='valid') self.pool_inputs = function([self.pool_inp], self.pool_convole, on_unused_input='warn', allow_input_downcast=True) # Create the theano function for calculating # the sorted vector of overlaps for each columns inhib overlaps self.o_mat = tensor.dmatrix() #self.so_mat = tensor.dmatrix() self.axis = tensor.scalar() self.arg_sort = sort(self.o_mat, self.axis, "quicksort") self.sort_vect = function([self.o_mat, self.axis], self.arg_sort) # Create the theano function for calculating # the minOverlap from the sorted vector of overlaps for each column. # This function takes a vector of indicies indicating where the # minLocalActivity resides for each row in the matrix. # Note: the sorted overlap matrix goes from low to highest so use neg index. self.min_OIndex = T.vector(dtype='int32') self.s_ColOMat = T.matrix(dtype='float32') self.row_numVect2 = T.vector(dtype='int32') self.get_indPosVal = self.s_ColOMat[self.row_numVect2, -self.min_OIndex] self.get_minLocAct = function([self.min_OIndex, self.s_ColOMat, self.row_numVect2], self.get_indPosVal, allow_input_downcast=True ) # Create the theano function for calculating # if a column should be active or not based on whether it # has an overlap greater then or equal to the minLocalActivity. self.minLocalActivity = T.matrix(dtype='float32') self.colOMat = T.matrix(dtype='float32') self.check_gt_zero = T.switch(T.gt(self.colOMat, 0), 1, 0) self.check_gteq_minLocAct = T.switch(T.ge(self.colOMat, self.minLocalActivity), self.check_gt_zero, 0) #self.indexActCol = tensor.eq(self.check_gteq_minLocAct, 1).nonzero() self.get_activeCol = function([self.colOMat, self.minLocalActivity], self.check_gteq_minLocAct, on_unused_input='warn', allow_input_downcast=True ) # Create the theano function for calculating # a matrix of the columns which should stay active because they # won the inhibition convolution for all columns. # if a column is inhibited then set that location to one only if # that row does not represent that inhibited column. self.col_pat = T.matrix(dtype='int32') self.act_cols = T.matrix(dtype='float32') self.col_num2 = T.matrix(dtype='int32') self.row_numMat4 = T.matrix(dtype='int32') self.cur_inhib_cols4 = T.vector(dtype='int32') self.test_meInhib = T.switch(T.eq(self.cur_inhib_cols4[self.row_numMat4], 1), 0, 1) self.set_winners = self.act_cols[self.col_pat-1, self.col_num2] self.check_colNotInhib = T.switch(T.lt(self.cur_inhib_cols4[self.col_pat-1], 1), self.set_winners, self.test_meInhib) self.check_colNotPad = T.switch(T.ge(self.col_pat-1, 0), self.check_colNotInhib, 0) self.get_activeColMat = function([self.act_cols, self.col_pat, self.col_num2, self.row_numMat4, self.cur_inhib_cols4], self.check_colNotPad, on_unused_input='warn', allow_input_downcast=True ) # Create the theano function for calculating # the rows that have more then or equal to # the input non_padSum. If this is true then set # in the output vector the col this row represents as active. # This function calculates if a column beat all the other non inhibited # columns in the convole overlap groups. self.col_winConPat = T.matrix(dtype='float32') self.non_padSum = T.vector(dtype='float32') self.w_cols = self.col_winConPat.sum(axis=1) self.test_lcol = T.switch(T.ge(self.w_cols, self.non_padSum), 1, 0) self.test_gtZero = T.switch(T.gt(self.non_padSum, 0), self.test_lcol, 0) self.get_activeColVect = function([self.col_winConPat, self.non_padSum], self.test_gtZero, allow_input_downcast=True) # Create the theano function for calculating # the sum of the rows of the input matrix. self.in_mat1 = T.matrix(dtype='float32') self.out_summat2 = self.in_mat1.sum(axis=1) self.get_sumRowMat = function([self.in_mat1], self.out_summat2, allow_input_downcast=True) # Create the theano function for calculating # the sum of the rows of the input vector. self.in_vect2 = T.vector(dtype='float32') self.out_sumvect2 = self.in_vect2.sum(axis=0) self.get_sumRowVec = function([self.in_vect2], self.out_sumvect2, allow_input_downcast=True) # Create the theano function for calculating # if the input matrix is larger then 0 (element wise). self.in_mat2 = T.matrix(dtype='float32') self.lt_zer0 = T.switch(T.gt(self.in_mat2, 0), 1, 0) self.get_gtZeroMat = function([self.in_mat2], self.lt_zer0, allow_input_downcast=True) # Create the theano function for calculating # if the input vector is larger then 0 (element wise). self.in_vect1 = T.vector(dtype='float32') self.gt_zeroVect = T.switch(T.gt(self.in_vect1, 0), 1, 0) self.get_gtZeroVect = function([self.in_vect1], self.gt_zeroVect, allow_input_downcast=True) # Create the theano function for calculating # if an input vector is larger then a scalar (element wise). self.in_vect7 = T.vector(dtype='float32') self.in_scalar = T.scalar(dtype='float32') self.ge_scalar = T.switch(T.ge(self.in_vect7, self.in_scalar), 1, 0) self.get_vectGeScalar = function([self.in_vect7, self.in_scalar], self.ge_scalar, allow_input_downcast=True) # Create the theano function for calculating # which columns in a columns convole inhib list are active. # A Matrix is returned where each row stores a list of # ones or zeros indicating which columns in a columns convole # group are active. self.act_cols4 = T.vector(dtype='float32') self.col_convolePatInd = T.matrix(dtype='int32') self.check_rCols = T.switch(T.gt(self.col_convolePatInd, 0), self.act_cols4[self.col_convolePatInd - 1], 0) self.get_actColsInCon = function([self.col_convolePatInd, self.act_cols4], self.check_rCols, allow_input_downcast=True) # Create the theano function for calculating # whether the active column in the columns convole list contains # the desired local activity number of active columns in its # convole list. This function returns a matrix where each # row stores a list of ones or zeros indicating which # columns in a columns convole group contain the desired number # of active columns. self.desiredLocalActivity2 = T.scalar(dtype='float32') self.numActColsInConVect2 = T.vector(dtype='float32') self.act_colsConMat = T.matrix(dtype='float32') self.col_convolePatInd2 = T.matrix(dtype='int32') self.get_colsConPat = T.switch(T.ge(self.numActColsInConVect2[self.col_convolePatInd2-1], self.desiredLocalActivity2), 1, 0) self.check_colsConPat = T.switch(T.gt(self.act_colsConMat, 0), self.get_colsConPat, 0) self.check_actColsCon = function([self.desiredLocalActivity2, self.col_convolePatInd2, self.numActColsInConVect2, self.act_colsConMat], self.check_colsConPat, allow_input_downcast=True) # Create the theano function for calculating # For the active columns if their convole list contains # the desired local activity number of active columns then # inhibit the remaining unactive cols in the convole list. # This function returns a matrix where each # row stores a list of ones or zeros indicating which # columns in a columns convole group should be inhibited. self.desiredLocalActivity3 = T.scalar(dtype='float32') self.numActColsInConVect4 = T.vector(dtype='float32') self.act_cols7 = T.vector(dtype='float32') self.row_numMat5 = T.matrix(dtype='int32') self.col_inConvoleMat2 = T.matrix(dtype='int32') self.check_numActCols = T.switch(T.ge(self.numActColsInConVect4[self.col_inConvoleMat2-1], self.desiredLocalActivity3), 1, 0) self.check_colIndAct = T.switch(T.gt(self.act_cols7[self.col_inConvoleMat2-1], 0), self.check_numActCols, 0) self.check_colsRowInAct = T.switch(T.gt(self.act_cols7[self.row_numMat5], 0), 0, self.check_colIndAct) self.check_gtZero = T.switch(T.gt(self.col_inConvoleMat2, 0), self.check_colsRowInAct, 0) self.inhibit_actColsCon = function([self.desiredLocalActivity3, self.col_inConvoleMat2, self.numActColsInConVect4, self.act_cols7, self.row_numMat5], self.check_gtZero, allow_input_downcast=True) # Create the theano function for calculating # the sum of the rows for the non active columns. # An input matrix elements represent the columns convole # lists where each column in the list is one if that columns # convole list also contains the desired local activity number of # active columns. The other input vector is a list of the active columns. self.act_cols5 = T.vector(dtype='float32') self.colsConMaxCols = T.matrix(dtype='float32') self.get_rowSum = self.colsConMaxCols.sum(axis=1) self.check_colAct = T.switch(T.gt(self.act_cols5, 0), 0, self.get_rowSum) self.sum_nonActColsRows = function([self.colsConMaxCols, self.act_cols5], self.check_colAct, allow_input_downcast=True) # Create the theano function for calculating # the input columns vector where the active columns # in the input vector have been set to zero. self.numActColsInConVect3 = T.vector(dtype='float32') self.act_cols6 = T.vector(dtype='float32') self.zero_actCol = T.switch(T.gt(self.act_cols6, 0), 0, self.numActColsInConVect3) self.remove_actCols = function([self.numActColsInConVect3, self.act_cols6], self.zero_actCol, allow_input_downcast=True) # Create the theano function for calculating # the updated inhibiton matrix for the columns. # The output is the colInConvoleList where each # position represents an inhibited or not col. #self.inh_colVect = T.vector(dtype='float32') self.act_cols3 = T.vector(dtype='float32') self.col_inConvoleMat2 = T.matrix(dtype='int32') self.row_numMat2 = T.matrix(dtype='int32') self.get_upInhibCols = T.switch(T.gt(self.act_cols3[self.row_numMat2], 0), 0, self.act_cols3[self.col_inConvoleMat2 - 1]) self.check_gtZero = T.switch(T.gt(self.col_inConvoleMat2, 0), self.get_upInhibCols, 0) self.check_vectValue = function([self.col_inConvoleMat2, self.act_cols3, self.row_numMat2], self.check_gtZero, allow_input_downcast=True) # Create the theano function for calculating # if a column should be inhibited because the column # has a zero overlap value. self.col_overlapVect = T.vector(dtype='float32') self.col_inhib = T.vector(dtype='int32') self.check_ltOne = T.switch(T.lt(self.col_overlapVect, 1), 1, self.col_inhib) self.inhibit_zeroOverlap = function([self.col_overlapVect, self.col_inhib], self.check_ltOne, allow_input_downcast=True) # Create the theano function for calculating # if a column should not be active because the column # has a zero overlap value. self.col_overlapVect = T.vector(dtype='float32') self.col_active = T.vector(dtype='int32') self.check_ltOne = T.switch(T.lt(self.col_overlapVect, 1), 0, self.col_active) self.disable_zeroOverlap = function([self.col_overlapVect, self.col_active], self.check_ltOne, allow_input_downcast=True) # Create the theano function for calculating # the first input vector minus the second. self.in_vect3 = T.vector(dtype='int32') self.in_vect4 = T.vector(dtype='int32') self.out_minusvect = self.in_vect3 - self.in_vect4 self.minus_vect = function([self.in_vect3, self.in_vect4], self.out_minusvect, allow_input_downcast=True) # Create the theano function for calculating # the first input vector plus the second. self.in_vect5 = T.vector(dtype='int32') self.in_vect6 = T.vector(dtype='int32') self.out_sumvect = self.in_vect5 + self.in_vect6 self.sum_vect = function([self.in_vect5, self.in_vect6], self.out_sumvect, allow_input_downcast=True) # Create the theano function for calculating # if a column in the matrix is inhibited. # Any inhibited columns should be set as zero. # Any columns not inhibited should be set to the input matrix value. self.act_cols2 = T.matrix(dtype='float32') self.col_pat3 = T.matrix(dtype='int32') self.cur_inhib_cols2 = T.vector(dtype='int32') self.set_winToZero = T.switch(T.eq(self.cur_inhib_cols2[self.col_pat3-1], 1), 0, self.act_cols2) self.check_lZeroCol = T.switch(T.ge(self.col_pat3-1, 0), self.set_winToZero, 0) self.check_inhibCols = function([self.act_cols2, self.col_pat3, self.cur_inhib_cols2], self.check_lZeroCol, allow_input_downcast=True ) # Create the theano function for calculating # if a column in the matrix is inhibited. # Any inhibited columns should be set as zero. # Any columns not inhibited should be set to the input pattern matrix value. self.col_pat4 = T.matrix(dtype='int32') self.cur_inhib_cols3 = T.vector(dtype='int32') self.set_patToZero = T.switch(T.eq(self.cur_inhib_cols3[self.col_pat4-1], 1), 0, self.col_pat4) self.check_lZeroCol2 = T.switch(T.ge(self.col_pat4-1, 0), self.set_patToZero, 0) self.check_inhibColsPat = function([self.col_pat4, self.cur_inhib_cols3], self.check_lZeroCol2, allow_input_downcast=True ) #### END of Theano functions and variables definitions ################################################################# # The folowing variables are used for indicies when looking up values # in matricies from within a theano function. # Create a matrix that just holds the column number for each element self.col_num = np.array([[i for i in range(self.potentialWidth*self.potentialHeight)] for j in range(self.width*self.height)]) # Create a matrix that just holds the row number for each element self.row_numMat = np.array([[j for i in range(self.potentialWidth*self.potentialHeight)] for j in range(self.width*self.height)]) # Create just a vector storing the row numbers for each column. # This is just an incrementing vector from zero to the number of columns - 1 self.row_numVect = np.array([i for i in range(self.width*self.height)]) # Create just a vector stroing if a column is inhibited or not self.inhibCols = np.array([0 for i in range(self.width*self.height)]) # Create a vector of minOverlap indicies. This stores the position # for each col where the minOverlap resides, in the sorted Convole overlap mat self.minOverlapIndex = np.array([self.desiredLocalActivity for i in range(self.width*self.height)]) # Now Also calcualte a convole grid so the columns position # in the resulting col inhib overlap matrix can be tracked. self.incrementingMat = np.array([[1+i+self.width*j for i in range(self.width)] for j in range(self.height)]) #print "self.incrementingMat = \n%s" % self.incrementingMat #print "potential height, width = %s, %s " %(self.potentialHeight, self.potentialWidth) self.colConvolePatternIndex = self.getColInhibInputs(self.incrementingMat) print "colConvole = \n%s" % self.colConvolePatternIndex #print "colConvole height, width = %s, %s " % (len(self.colConvolePatternIndex),len(self.colConvolePatternIndex[0])) # Calculate a matrix storing the location of the numbers from # colConvolePatternIndex. self.colInConvoleList = self.calculateConvolePattern(self.colConvolePatternIndex) print "colInConvoleList = \n%s" % self.colInConvoleList # Store a vector where each element stores for a column how many times # that column appears in other columns convole lists. self.nonPaddingSumVect = self.get_gtZeroMat(self.colInConvoleList) self.nonPaddingSumVect = self.get_sumRowMat(self.nonPaddingSumVect)
Ishape = intercept.shape intercept.shape = (1, Ishape[0], 1, 1) Ashape = A.shape A.shape = (Ashape[0], 1, Ashape[1], Ashape[2]) Bshape = filter.shape filter.shape = (Bshape[0], 1, Bshape[1], Bshape[2]) R = fc_fun(A.astype(floatX1), rot180_T4(filter).astype(floatX1), intercept.astype(floatX1)) A.shape = Ashape filter.shape = Bshape intercept.shape = Ishape return R pdim = T.scalar('pool dim', dtype = floatX1) pool_inp = T.tensor4('pool input', dtype = floatX1) pool_sum = TSN.images2neibs(pool_inp, (pdim, pdim)) pool_out = pool_sum.mean(axis=-1) pool_fun = theano.function([pool_inp, pdim], pool_out, name = 'pool_fun') def average_pool_T4(A, pool_dim): """ Compute average pooling for a 4-dimensional tensor - this is equivalent to pooling over all the matrices stored in the 4-dim tensor """ # Warning: pool_fun returns a 1-D vector, we need to reshape it into a 4-D # tensor temp = pool_fun(A, pool_dim) temp.shape = (A.shape[0], A.shape[1], A.shape[2]/pool_dim, A.shape[3]/pool_dim) return temp
return np.rot90(A, 2) def average_pool(A, pool_dim): B = np.ones((1, pool_dim, pool_dim)).astype(floatX) R = np.zeros((A.shape[0], A.shape[1], A.shape[2] / pool_dim, A.shape[3] / pool_dim)).astype(floatX) for i in range(A.shape[0]): temp = convolveTH4(A[i], B)[:, 0, 0::pool_dim, 0::pool_dim] R[i] = temp / (pool_dim * pool_dim) return R pdim = T.scalar('pool dim', dtype=floatX) pool_inp = T.tensor4('pool input', dtype=floatX) pool_sum = TSN.images2neibs(pool_inp, (pdim, pdim)) pool_out = pool_sum.mean(axis=-1) pool_fun = theano.function([pool_inp, pdim], pool_out) def pool_th(A, pool_dim): temp = pool_fun(A, pool_dim) temp.shape = (A.shape[0], A.shape[1], A.shape[2] / pool_dim, A.shape[3] / pool_dim) return temp def convolveTH2(A, B): A.shape = (1, 1, A.shape[0], A.shape[1]) B.shape = (1, 1, B.shape[0], B.shape[1]) C = conv_fun(A, B)
def Fold(conv_out, orig, ds=(2,1), ignore_border=False): '''Fold into two. (Sum up vertical neighbours)''' imgs = images2neibs(conv_out, T.as_tensor_variable(ds), mode='ignore_borders') # Correct 'mode' if there's a typo! res = T.reshape(T.sum(imgs, axis=-1), orig) return res
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), k=[], unifiedWidth=30, left=[], right=[], firstLayer=True): assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) # the original one self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) ''' self.W = theano.shared(value=numpy.zeros(filter_shape, dtype=theano.config.floatX), # @UndefinedVariable name='W', borrow=True) ''' # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[2]/2,1), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) bb=T.repeat(self.b, unifiedWidth, axis=1) conv_out= conv_WP(inputs=input, filters_W=self.W, filter_shape=filter_shape, image_shape=image_shape) # convolve input feature maps with filters #conv_out = conv.conv2d(input=input, filters=self.W, # filter_shape=filter_shape, image_shape=image_shape, border_mode='full') #folding matrix_shape=T.cast(T.join(0, T.as_tensor([T.prod(conv_out.shape[:-1])]), T.as_tensor([conv_out.shape[3]])), 'int64') matrix = T.reshape(conv_out, matrix_shape, ndim=2) odd_matrix=matrix[0:matrix_shape[0]:2] even_matrix=matrix[1:matrix_shape[0]:2] raw_folded_matrix=(odd_matrix+even_matrix)*0.5 out_shape=T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]/2]), T.as_tensor([conv_out.shape[3]])), 'int64') fold_out=T.reshape(raw_folded_matrix, out_shape, ndim=4) padded_matrices=[] for i in range(image_shape[0]): # image_shape[0] is actually batch_size neighborsForPooling = TSN.images2neibs(ten4=fold_out[i:(i+1)], neib_shape=(1,fold_out.shape[3]), mode='ignore_borders') #wenpeng1=theano.printing.Print('original')(neighborsForPooling[:, 25:35]) non_zeros=neighborsForPooling[:,left[i]:(neighborsForPooling.shape[1]-right[i])] # only consider non-zero elements #wenpeng2=theano.printing.Print('non-zeros')(non_zeros) neighborsArgSorted = T.argsort(non_zeros, axis=1) kNeighborsArg = neighborsArgSorted[:,-k[i]:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) # make y indices in acending lie ii = T.repeat(T.arange(non_zeros.shape[0]), k[i]) jj = kNeighborsArgSorted.flatten() pooledkmaxList = non_zeros[ii, jj] # now, should be a vector new_shape = T.cast(T.join(0, T.as_tensor([non_zeros.shape[0]]), T.as_tensor([k[i]])), 'int64') pooledkmaxMatrix = T.reshape(pooledkmaxList, new_shape, ndim=2) if firstLayer: leftWidth=(unifiedWidth-k[i])/2 rightWidth=unifiedWidth-leftWidth-k[i] left_padding = T.zeros((non_zeros.shape[0], leftWidth), dtype=theano.config.floatX) right_padding = T.zeros((non_zeros.shape[0], rightWidth), dtype=theano.config.floatX) matrix_padded = T.concatenate([left_padding, pooledkmaxMatrix, right_padding], axis=1) padded_matrices.append(matrix_padded) else: padded_matrices.append(pooledkmaxMatrix) overall_matrix=T.concatenate(padded_matrices, axis=0) new_shape = T.cast(T.join(0, fold_out.shape[:-2], T.as_tensor([fold_out.shape[2]]), T.as_tensor([unifiedWidth])), 'int64') pooled_out = T.reshape(overall_matrix, new_shape, ndim=4) #wenpeng2=theano.printing.Print('pooled_out')(pooled_out[:,:,:,15:]) # downsample each feature map individually, using maxpooling ''' pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height #@wenpeng: following tanh operation will voilate our expectation that zero-padding, for its output will have no zero any more #self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) biased_pooled_out=pooled_out + bb.dimshuffle('x', 'x', 0, 1) #now, reset some zeros self.leftPad=(unifiedWidth-k)/2 self.rightPad=unifiedWidth-self.leftPad-k if firstLayer: zero_recover_matrices=[] for i in range(image_shape[0]): # image_shape[0] is actually batch_size neighborsForPooling = TSN.images2neibs(ten4=biased_pooled_out[i:(i+1)], neib_shape=(1,biased_pooled_out.shape[3]), mode='ignore_borders') left_zeros=T.set_subtensor(neighborsForPooling[:,:self.leftPad[i]], T.zeros((neighborsForPooling.shape[0], self.leftPad[i]), dtype=theano.config.floatX)) right_zeros=T.set_subtensor(left_zeros[:,(neighborsForPooling.shape[1]-self.rightPad[i]):], T.zeros((neighborsForPooling.shape[0], self.rightPad[i]), dtype=theano.config.floatX)) zero_recover_matrices.append(right_zeros) overall_matrix_new=T.concatenate(zero_recover_matrices, axis=0) pooled_out_with_zeros = T.reshape(overall_matrix_new, new_shape, ndim=4) self.output=T.tanh(pooled_out_with_zeros) else: self.output=T.tanh(biased_pooled_out) # store parameters of this layer self.params = [self.W, self.b]
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), k=4): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) #images2neibs produces a 2D matrix neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1,conv_out.shape[3]), mode='ignore_borders') #k = poolsize[1] neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] # reshape pooledkmaxTmp new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) # downsample each feature map individually, using maxpooling ''' pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
from theano import tensor as T from theano.sandbox.neighbours import images2neibs X = T.TensorType(broadcastable = (False,False,False,False), dtype = 'float32')() Y = images2neibs(X,(2,2)) W = T.matrix() Z = T.dot(Y,W) cost = Z.sum() T.grad(cost,W)