def test_convolutional_layer(): x = tensor.tensor4('x') num_channels = 4 batch_size = 5 pooling_size = 3 num_filters = 3 filter_size = (3, 3) activation = Rectifier().apply conv = ConvolutionalLayer(activation, filter_size, num_filters, (pooling_size, pooling_size), num_channels, image_size=(17, 13), batch_size=batch_size, weights_init=Constant(1.), biases_init=Constant(5.)) conv.initialize() y = conv.apply(x) func = function([x], y) x_val = numpy.ones((batch_size, num_channels, 17, 13), dtype=theano.config.floatX) assert_allclose(func(x_val), numpy.prod(filter_size) * num_channels * numpy.ones((batch_size, num_filters, 5, 4)) + 5) assert_equal(conv.convolution.batch_size, batch_size) assert_equal(conv.pooling.batch_size, batch_size)
def test_convolutional_layer(): x = tensor.tensor4('x') num_channels = 4 batch_size = 5 pooling_size = 3 num_filters = 3 filter_size = (3, 3) activation = Rectifier().apply conv = ConvolutionalLayer(activation, filter_size, num_filters, (pooling_size, pooling_size), num_channels, image_size=(17, 13), weights_init=Constant(1.), biases_init=Constant(5.)) conv.initialize() y = conv.apply(x) func = function([x], y) x_val = numpy.ones((batch_size, num_channels, 17, 13), dtype=theano.config.floatX) assert_allclose( func(x_val), numpy.prod(filter_size) * num_channels * numpy.ones( (batch_size, num_filters, 5, 4)) + 5)
def test_convolutional_layer_use_bias(): act = ConvolutionalLayer(Rectifier().apply, (3, 3), 5, (2, 2), 6, image_size=(9, 9), use_bias=False) act.allocate() assert not act.convolution.use_bias assert len(ComputationGraph([act.apply(tensor.tensor4())]).parameters) == 1
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def main(): initial = numpy.random.normal(0, 0.1, (1, 1, 200, 200)) x = theano.shared(initial) conv_layer = ConvolutionalLayer( Rectifier().apply, (16, 16), 9, (4, 4), 1 ) conv_layer2 = ConvolutionalLayer( Rectifier().apply, (7, 7), 9, (2, 2), 1 ) con_seq = ConvolutionalSequence([conv_layer], 1, image_size=(200, 200), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.) ) con_seq.initialize() out = con_seq.apply(x) target_out = out[0, 0, 1, 1] grad = theano.grad(target_out - .1 * (x ** 2).sum(), x) updates = {x: x + 5e-1 * grad} #x.set_value(numpy.ones((1, 1, 200, 200))) #print theano.function([], out)() make_step = theano.function([], target_out, updates=updates) for i in xrange(400): out_val = make_step() print i, out_val image = x.get_value()[0][0] image = (image - image.mean()) / image.std() image = numpy.array([image, image, image]).transpose(1, 2, 0) plt.imshow(numpy.cast['uint8'](image * 65. + 128.), interpolation='none') plt.show()
def __init__(self, conv_activations, num_channels, image_shape, filter_sizes, feature_maps, pooling_sizes, top_mlp_activations, top_mlp_dims, conv_step=None, border_mode='valid', **kwargs): if conv_step is None: self.conv_step = (1, 1) else: self.conv_step = conv_step self.num_channels = num_channels self.image_shape = image_shape self.top_mlp_activations = top_mlp_activations self.top_mlp_dims = top_mlp_dims self.border_mode = border_mode parameters = zip(conv_activations, filter_sizes, feature_maps, pooling_sizes) # Construct convolutional layers with corresponding parameters self.layers = [ ConvolutionalLayer(filter_size=filter_size, num_filters=num_filter, pooling_size=pooling_size, activation=activation.apply, conv_step=self.conv_step, border_mode=self.border_mode, name='conv_pool_{}'.format(i)) for i, (activation, filter_size, num_filter, pooling_size) in enumerate(parameters) ] self.conv_sequence = ConvolutionalSequence(self.layers, num_channels, image_size=image_shape) # Construct a top MLP self.top_mlp = MLP(top_mlp_activations, top_mlp_dims) # We need to flatten the output of the last convolutional layer. # This brick accepts a tensor of dimension (batch_size, ...) and # returns a matrix (batch_size, features) self.flattener = Flattener() application_methods = [ self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply ] super(LeNet, self).__init__(application_methods, **kwargs)
def test_border_mode_not_pushed(): layers = [ Convolutional(border_mode='full'), ConvolutionalActivation(Rectifier().apply), ConvolutionalActivation(Rectifier().apply, border_mode='valid'), ConvolutionalLayer(Rectifier().apply, border_mode='full') ] stack = ConvolutionalSequence(layers) stack.push_allocation_config() assert stack.children[0].border_mode == 'full' assert stack.children[1].border_mode == 'valid' assert stack.children[2].border_mode == 'valid' assert stack.children[3].border_mode == 'full' stack2 = ConvolutionalSequence(layers, border_mode='full') stack2.push_allocation_config() assert stack2.children[0].border_mode == 'full' assert stack2.children[1].border_mode == 'full' assert stack2.children[2].border_mode == 'full' assert stack2.children[3].border_mode == 'full'
from blocks.initialization import IsotropicGaussian, Constant, Uniform from blocks.roles import WEIGHT, FILTER, INPUT from blocks.graph import ComputationGraph, apply_dropout batch_size = 128 filter_size = 3 num_filters = 4 initial_weight_std = .01 epochs = 5 x = T.tensor4('features') y = T.lmatrix('targets') # Convolutional Layers conv_layers = [ ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'), ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2') ] convnet = ConvolutionalSequence(conv_layers, num_channels=1, image_size=(28, 28), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) print(output_dim) # Fully connected layers
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2, 2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1, 1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter( roles=[BIAS])(ComputationGraph([y_hat]).variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:, :, :, :] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])( ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:, :, :, :] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02 ]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
# from blocks.bricks.cost import CategoricalCrossEntropy from blocks.bricks.conv import (ConvolutionalLayer, ConvolutionalSequence, Flattener) from blocks.initialization import Uniform, Constant x = tensor.tensor4('images') y = tensor.lmatrix('targets') # Convolutional layers filter_sizes = [(5, 5)] * 3 + [(4, 4)] * 3 num_filters = [32, 32, 64, 64, 128, 256] pooling_sizes = [(2, 2)] * 6 activation = Rectifier().apply conv_layers = [ ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size) for filter_size, num_filters_, pooling_size in zip( filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(260, 260), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.initialize() # Fully connected layers features = Flattener().apply(convnet.apply(x)) mlp = MLP(activations=[Rectifier(), None], dims=[256, 256, 2],
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer(filter_size=(4, 4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer(filter_size=(3, 3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP(activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations( D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum( [tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X, y], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02 ]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f( Xtrain[n, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])), ytrain[n, :].reshape((1, 10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))