def test_argsort(): # Set up rng = np.random.RandomState(seed=utt.fetch_seed()) m_val = rng.rand(3, 2) v_val = rng.rand(4) # Example 1 a = tensor.dmatrix() w = argsort(a) f = theano.function([a], w) gv = f(m_val) gt = np.argsort(m_val) assert np.allclose(gv, gt) # Example 2 a = tensor.dmatrix() axis = tensor.lscalar() w = argsort(a, axis) f = theano.function([a, axis], w) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 3 a = tensor.dvector() w2 = argsort(a) f = theano.function([a], w2) gv = f(v_val) gt = np.argsort(v_val) assert np.allclose(gv, gt) # Example 4 a = tensor.dmatrix() axis = tensor.lscalar() l = argsort(a, axis, "mergesort") f = theano.function([a, axis], l) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert np.allclose(gv, gt) # Example 5 a = tensor.dmatrix() axis = tensor.lscalar() a1 = ArgSortOp("mergesort", []) a2 = ArgSortOp("quicksort", []) # All the below should give true assert a1 != a2 assert a1 == ArgSortOp("mergesort", []) assert a2 == ArgSortOp("quicksort", []) # Example 6: Testing axis=None a = tensor.dmatrix() w2 = argsort(a, None) f = theano.function([a], w2) gv = f(m_val) gt = np.argsort(m_val, None) assert np.allclose(gv, gt)
def test_argsort(): #Set up rng = np.random.RandomState(seed=utt.fetch_seed()) m_val = rng.rand(3, 2) v_val = rng.rand(4) #Example 1 a = tensor.dmatrix() w = argsort(a) f = theano.function([a], w) gv = f(m_val) gt = np.argsort(m_val) assert_allclose(gv, gt) #Example 2 a = tensor.dmatrix() axis = tensor.scalar() w = argsort(a, axis) f = theano.function([a, axis], w) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert_allclose(gv, gt) #Example 3 a = tensor.dvector() w2 = argsort(a) f = theano.function([a], w2) gv = f(v_val) gt = np.argsort(v_val) assert_allclose(gv, gt) #Example 4 a = tensor.dmatrix() axis = tensor.scalar() l = argsort(a, axis, "mergesort") f = theano.function([a, axis], l) for axis_val in 0, 1: gv = f(m_val, axis_val) gt = np.argsort(m_val, axis_val) assert_allclose(gv, gt) #Example 5 a = tensor.dmatrix() axis = tensor.scalar() a1 = ArgSortOp("mergesort", []) a2 = ArgSortOp("quicksort", []) #All the below should give true assert a1 != a2 assert a1 == ArgSortOp("mergesort", []) assert a2 == ArgSortOp("quicksort", []) #Example 6: Testing axis=None a = tensor.dmatrix() w2 = argsort(a, None) f = theano.function([a], w2) gv = f(m_val) gt = np.argsort(m_val, None) assert_allclose(gv, gt)
def test_argsort_grad(): # Testing grad of argsort data = np.random.rand(2, 3).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=-1), [data]) data = np.random.rand(2, 3, 4, 5).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=-3), [data]) data = np.random.rand(2, 3, 3).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=2), [data])
def test_argsort_grad(): # Testing grad of argsort data = np.random.rand(2, 3).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=-1), [data]) data = np.random.rand(2, 3, 4, 5).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=-3), [data]) data = np.random.rand(2, 3, 3).astype(theano.config.floatX) utt.verify_grad(lambda x: argsort(x, axis=2), [data])
def get_output_for(self, inputs, **kwargs): """ Compute this layer's output function given a symbolic input variable. Parameters ---------- :param inputs: list of theano.TensorType `inputs[0]` should always be the symbolic vertex variable. `inputs[1]` should always be the symbolic edge variable. :return: theano.TensorType Symbolic output variable. """ vertex = inputs[self.vertex_incoming_index] # shuffle vertex to shape [batch, n, channel] vertex = vertex.dimshuffle(0, 2, 1) # get each dimension vertex_shape = vertex.shape batch_size = vertex_shape[0] num_vertex = vertex_shape[1] num_channel = vertex_shape[2] num_dist_metrics = self.edge_shape[1] filter_size = self.filter_size num_filters = self.num_filters # vertex_conv shape [batch, n, n, channel] vertex_conv = T.cast(T.alloc(0.0, batch_size, num_vertex, num_vertex, num_channel), 'floatX') vertex_conv = vertex_conv + vertex.dimshuffle(0, 'x', 1, 2) # reshape vertex_conv to [batch * n, n, channel] vertex_conv = T.reshape(vertex_conv, (batch_size * num_vertex, num_vertex, num_channel)) edge = inputs[self.edge_incoming_index] edge_sorted_indices = argsort(edge, axis=3) # take last filter_size indices. the shape of edge_sorted_indices is [batch, d, n, k] edge_sorted_indices = edge_sorted_indices[:, :, :, :filter_size] # shuffle indices to shape [batch, n, d, k] edge_sorted_indices = edge_sorted_indices.dimshuffle(0, 2, 1, 3) # reshape indices to shape [batch * n, d * k] edge_sorted_indices = T.reshape(edge_sorted_indices, (batch_size * num_vertex, num_dist_metrics * filter_size)) # compute conv_tensor with shape [d * k, batch * n, channel] conv = vertex_conv[T.arange(batch_size * num_vertex), edge_sorted_indices.T, :] # shuffle conv to [batch * n, d * k, channel] conv = conv.dimshuffle(1, 0, 2) # reshape conv to [batch * n, d * k * channel] conv = T.reshape(conv, (batch_size * num_vertex, num_dist_metrics * filter_size * num_channel)) # dot conv with W ([batch * n, d * k * channel] x [d * k * channel, num_filters] = [batch * n, num_filters] activation = T.dot(conv, self.W) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) # apply nonlinear function activation = self.nonlinearity(activation) # reshape activation back to [batch, n, num_filters] activation = T.reshape(activation, (batch_size, num_vertex, num_filters)) # shuffle it to [batch, num_filters, n] return activation.dimshuffle(0, 2, 1)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), mid=3): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) # downsample each feature map individually, using maxpooling # pooled_out = downsample.max_pool_2d(input=conv_out, # ds=poolsize, ignore_border=True) temp = theano.sandbox.neighbours.images2neibs(conv_out, poolsize) # pooled_out = temp.mean(axis=-1) ords = argsort(temp) num = image_shape[0] * filter_shape[0] * ( image_shape[2] - filter_shape[2] + 1) / poolsize[0] * ( image_shape[3] - filter_shape[3] + 1) / poolsize[1] largest = temp[numpy.arange(0, num), ords[:, mid]] pooled_out = largest.reshape( (image_shape[0], filter_shape[0], (image_shape[2] - filter_shape[2] + 1) / poolsize[0], (image_shape[3] - filter_shape[3] + 1) / poolsize[1])) # print image_shape # print filter_shape # pooled_out = pylearn2.models.mlp.mean_pool(bc01=conv_out, pool_shape=poolsize, pool_stride=poolsize, image_shape=(image_shape[2] - filter_shape[2] + 1, image_shape[3] - filter_shape[3] + 1)) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]
def argsort(self, axis=-1, kind='quicksort', order=None): """See `theano.tensor.argsort`""" from theano.tensor.sort import argsort return argsort(self, axis, kind, order)
def get_output_for(self, inputs, **kwargs): """ Compute this layer's output function given a symbolic input variable. Parameters ---------- :param inputs: list of theano.TensorType `inputs[0]` should always be the symbolic vertex variable. `inputs[1]` should always be the symbolic edge variable. :return: theano.TensorType Symbolic output variable. """ vertex = inputs[self.vertex_incoming_index] # shuffle vertex to shape [batch, n, channel] vertex = vertex.dimshuffle(0, 2, 1) # get each dimension vertex_shape = vertex.shape batch_size = vertex_shape[0] num_vertex = vertex_shape[1] num_channel = vertex_shape[2] num_dist_metrics = self.edge_shape[1] filter_size = self.filter_size num_filters = self.num_filters # vertex_conv shape [batch, n, n, channel] vertex_conv = T.cast( T.alloc(0.0, batch_size, num_vertex, num_vertex, num_channel), 'floatX') vertex_conv = vertex_conv + vertex.dimshuffle(0, 'x', 1, 2) # reshape vertex_conv to [batch * n, n, channel] vertex_conv = T.reshape( vertex_conv, (batch_size * num_vertex, num_vertex, num_channel)) edge = inputs[self.edge_incoming_index] edge_sorted_indices = argsort(edge, axis=3) # take last filter_size indices. the shape of edge_sorted_indices is [batch, d, n, k] edge_sorted_indices = edge_sorted_indices[:, :, :, :filter_size] # shuffle indices to shape [batch, n, d, k] edge_sorted_indices = edge_sorted_indices.dimshuffle(0, 2, 1, 3) # reshape indices to shape [batch * n, d * k] edge_sorted_indices = T.reshape( edge_sorted_indices, (batch_size * num_vertex, num_dist_metrics * filter_size)) # compute conv_tensor with shape [d * k, batch * n, channel] conv = vertex_conv[T.arange(batch_size * num_vertex), edge_sorted_indices.T, :] # shuffle conv to [batch * n, d * k, channel] conv = conv.dimshuffle(1, 0, 2) # reshape conv to [batch * n, d * k * channel] conv = T.reshape(conv, (batch_size * num_vertex, num_dist_metrics * filter_size * num_channel)) # dot conv with W ([batch * n, d * k * channel] x [d * k * channel, num_filters] = [batch * n, num_filters] activation = T.dot(conv, self.W) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) # apply nonlinear function activation = self.nonlinearity(activation) # reshape activation back to [batch, n, num_filters] activation = T.reshape(activation, (batch_size, num_vertex, num_filters)) # shuffle it to [batch, num_filters, n] return activation.dimshuffle(0, 2, 1)
def argsort(self, axis=-1, kind='quicksort', order=None): """See `theano.tensor.argsort`""" from theano.tensor.sort import argsort return argsort(self, axis, kind, order)
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), mid=3): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) # downsample each feature map individually, using maxpooling # pooled_out = downsample.max_pool_2d(input=conv_out, # ds=poolsize, ignore_border=True) temp = theano.sandbox.neighbours.images2neibs(conv_out, poolsize) # pooled_out = temp.mean(axis=-1) ords = argsort(temp) num = image_shape[0] * filter_shape[0] * (image_shape[2] - filter_shape[2] + 1)/poolsize[0] * (image_shape[3] - filter_shape[3] + 1)/poolsize[1] largest = temp[numpy.arange(0, num), ords[:,mid]] pooled_out = largest.reshape((image_shape[0], filter_shape[0], (image_shape[2] - filter_shape[2] + 1)/poolsize[0], (image_shape[3] - filter_shape[3] + 1)/poolsize[1])) # print image_shape # print filter_shape # pooled_out = pylearn2.models.mlp.mean_pool(bc01=conv_out, pool_shape=poolsize, pool_stride=poolsize, image_shape=(image_shape[2] - filter_shape[2] + 1, image_shape[3] - filter_shape[3] + 1)) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]