def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20],filter_shape=[9,5], batch_size=200, verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,filter_shape[0],filter_shape[0]), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-9+1)/2 = 12 image_shape=(batch_size,nkerns[0],(33-filter_shape[0])/2,(33-filter_shape[0])/2), filter_shape=(nkerns[1],nkerns[0],filter_shape[1],filter_shape[1]), poolsize=(2,2) ) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. layer0_output_ds = downsample.max_pool_2d( # nkerns[0] 12 x 12 # nkerns[1] 4 x 4 input=layer0.output, ds=(3,3), # TDOD: change ds ignore_border=False ) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) filter_shape_2 = ((33-filter_shape[0])/2 - filter_shape[1]+1)/2 # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer2_input, # (12-5+1)/2 = 4 image_shape=(batch_size,nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO filter_shape=(nkerns[2],nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO poolsize= (1,1)#TODO ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 1 * 1, n_out= 10,#TODO, activation=T.nnet.sigmoid ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= 10,#TODO n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, n_hidden=[200,200,200], verbose=True): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out= n_hidden[0],#TODO, activation=T.nnet.sigmoid ) layer3 = HiddenLayer( rng, input=layer2.output, n_in=n_hidden[0], n_out=n_hidden[1],#TODO, activation=T.nnet.sigmoid ) layer4 = HiddenLayer( rng, input=layer3.output, n_in=n_hidden[1], n_out=n_hidden[2],#TODO, activation=T.nnet.sigmoid ) layer5 = LogisticRegression( input=layer4.output, n_in=n_hidden[2], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = layer5.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer5.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer5.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def loadModel(imageArray,validSet, learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=True): rng = numpy.random.RandomState(23455) #datasets = loadData(shared=False) #train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = validSet #test_set_x, test_set_y = datasets[2] valid_set_x[0] = imageArray valid_set_x, valid_set_y = shared_dataset([valid_set_x, valid_set_y]) # compute number of minibatches for training, validation and testing n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### #Make learning rate a theano shared variable # Reshape matrix of rasterized images of shape (batch_size, 1 * 48 * 48) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape= (batch_size, 1, 48, 48), filter_shape= (nkerns[0],1,3,3), poolsize= (2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape= (batch_size, nkerns[0], 23, 23) , filter_shape= (nkerns[1],nkerns[0],4,4), poolsize= (2,2) ) # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape= (batch_size,nkerns[1],10,10), filter_shape= (nkerns[2],nkerns[1],3,3), poolsize= (2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out= batch_size, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= batch_size, n_out=7) getPofYGivenX = theano.function( [index], layer4.pOfYGivenX(), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore' ) #Load the saved parameters f = open('layer0.W','rb') layer0.W.set_value(cPickle.load(f)) f.close() f = open('layer0.b','rb') layer0.b.set_value(cPickle.load(f)) f.close() f = open('layer1.W','rb') layer1.W.set_value(cPickle.load(f)) f.close() f = open('layer1.b','rb') layer1.b.set_value(cPickle.load(f)) f.close() f = open('layer2.W','rb') layer2.W.set_value(cPickle.load(f)) f.close() f = open('layer2.b','rb') layer2.b.set_value(cPickle.load(f)) f.close() f = open('layer3.W','rb') layer3.W.set_value(cPickle.load(f)) f.close() f = open('layer3.b','rb') layer3.b.set_value(cPickle.load(f)) f.close() f = open('layer4.W','rb') layer4.W.set_value(cPickle.load(f)) f.close() f = open('layer4.b','rb') layer4.b.set_value(cPickle.load(f)) f.close() predictedList = getPofYGivenX(0) predictedMoods = predictedList[0].tolist() return [predictedMoods.index(max(predictedMoods)),max(predictedMoods)]
def test_gaussian(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # TODO: Construct the second convolutional pooling layer # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] layer0.W = [make_Gaussian(size = 5), make_Gaussian(size = 5), make_Gaussian(size = 5)] layer0.b = numpy.zeros((nkerns[0],), dtype=theano.config.floatX) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_filter(learning_rate=0.1, n_epochs=1000, nkerns=[3, 512], batch_size=200, verbose=True): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) mean_w_0 = layer0.W.get_value().mean() plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('trained filter') ########################################################################### ########################################################################### ########################################################################### filter_shape_input = (nkerns[0],3,5,5) pt_input = numpy.zeros((filter_shape_input[2],filter_shape_input[3])) pt_input[(filter_shape_input[2]-1)/2,(filter_shape_input[3]-1)/2]=1.0 W = numpy.zeros(filter_shape_input) from scipy.ndimage.filters import gaussian_filter as gf for knkerns0 in range(nkerns[0]): for kch in range(3): W[knkerns0,kch,:,:]=gf(pt_input,(knkerns0+1.0)) W[knkerns0,kch,:,:] = W[knkerns0,kch,:,:]/W[knkerns0,kch,:,:].mean()*mean_w_0 W = theano.shared(W,borrow=True) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=filter_shape_input, poolsize=(2,2) ) layer0.W = W # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent # the param of layer0 is excluded params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('pre-defined filter')
def test_para_num(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],L1_reg=0.00, L2_reg=0.0001, batch_size=128, n_hiddenLayers=2,verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') ########################################################################### ################################## CNN #################################### ########################################################################### # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ########################################################################### ################################## MLP #################################### ########################################################################### ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') n_hidden = [0,0]; n_hidden[0]=nkerns[0]*14*14 n_hidden[1]=nkerns[1]*5*5 # TODO: construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_dropout(learning_rate=0.1, n_epochs=1000, nkerns=[64, 128], batch_size=120, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels testing = T.iscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = DropOut(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500, verbose=True, fileName='predictionsMLP'): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) testing = T.lscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) layer0_input = layer0_input.flatten(2) # TODO: Construct the first convolutional pooling layer layer0 = HiddenLayer(rng, input=layer0_input, n_in=32 * 32 * 3, n_out=n_hidden, activation=T.tanh) layer1 = HiddenLayer(rng, input=layer0.output, n_in=n_hidden, n_out=n_hidden, activation=T.tanh) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # TODO: construct a fully-connected sigmoidal layer layer2 = DropConnect(rng, input=layer1.output, n_in=n_hidden, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') getPredictedValue = theano.function( [index], layer3.predictedValue(), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskW.get_value() * param_update)) elif (param.name == 'bDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskb.get_value() * param_update)) else: updates.append( (param, param - learning_rate.get_value().item() * param_update)) updates.append( (param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=False, filter_size=2): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. # TDOD: change ds layer0_output_ds = downsample.max_pool_2d(input=layer0.output, ds=(2, 2), ignore_border=True) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) # TODO: Construct the third convolutional pooling layer new_shape = (new_shape - filter_size + 1) // 2 layer2 = LeNetConvPoolLayer(rng, input=layer2_input, image_shape=(batch_size, nkerns[0] + nkerns[1], new_shape, new_shape), filter_shape=(nkerns[2], nkerns[0] + nkerns[1], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer new_shape = (new_shape - filter_size + 1) // 2 layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * new_shape * new_shape, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False, filter_size=5): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer new_factors = (new_shape - filter_size + 1) // 2 layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * new_factors * new_factors, n_out=500, activation=T.tanh) layer3 = HiddenLayer(rng, input=layer2.output, n_in=500, n_out=500, activation=T.tanh) # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2), weights=filter_weights) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert (len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden, ) * dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i - 1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i - 1] DNN_Layers.append( HiddenLayer(rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh)) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression(input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_dropconnect3(learning_rate=0.1, n_epochs=1000, nkerns=[16,64,20], batch_size=20, verbose=True, fileName = 'predictionsDropConnect3_Cifar',activation=tanh,fullyconnected=300,p=0.5): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data_cifar() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) #testing = T.lscalar('testing') testing = T.iscalar('testing') testValue = testing getTestValue = theano.function([testing],testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size,3,32,32), filter_shape=(nkerns[0],3,5,5), poolsize=(2,2), activation=tanh ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2), activation=tanh ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size,nkerns[1],5,5), filter_shape=(nkerns[2],nkerns[1],2,2), poolsize=(2,2), activation=tanh ) layer3_input = layer2.output.flatten(2) layer3 = DropConnect( rng, input=layer3_input, n_in=nkerns[2]*2*2, n_out=fullyconnected, testing=testing, activation=activation, p=p ) # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression( input=layer3.output, n_in=fullyconnected, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) getPredictedValue = theano.function( [index], layer4.predictedValue(), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params+layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param,param - learning_rate.get_value().item() * layer3.maskW.get_value() * param_update)) elif(param.name == 'bDrop'): updates.append((param,param - learning_rate.get_value().item() * layer3.maskb.get_value() * param_update)) else: updates.append((param,param - learning_rate.get_value().item() * param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate.get_value().item() * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate.get_value().item() * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], testing : getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True ) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def MY_lenet(learning_rate=0.1, n_epochs=200, nkerns=[20, 50], batch_size=500, L1_reg=0.00, L2_reg=0.0001): rng = numpy.random.RandomState(23455) ds_rate = None datasets = load_data(ds_rate=ds_rate, theano_shared=False) train_set_x, train_set_y = datasets[0] train_size = train_set_x.shape n_train = train_size[0] ''' print '... Translating images' train_set_x_tran = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = translate_image(img) train_set_x_tran[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) print '... Rotating images' train_set_x_rota = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = rotate_image(img) train_set_x_rota[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' print '... Fliping images' train_set_x_flip = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i], (3, 32, 32))).transpose(1, 2, 0) img_tran = flip_image(img) train_set_x_flip[i] = np.reshape(img_tran.transpose(2, 0, 1), (3 * 32 * 32)) ''' print '... Ennoising images' train_set_x_nois = np.empty(train_size) for i in range(n_train): img = (np.reshape(train_set_x[i],(3,32,32))).transpose(1,2,0) img_tran = noise_injection(img) train_set_x_aug[i] = np.reshape(img_tran.transpose(2,0,1),(3*32*32)) ''' train_set_x = np.concatenate( ( train_set_x, #train_set_x_tran, #train_set_x_rota, train_set_x_flip), axis=0) train_set_y = np.concatenate( ( train_set_y, #train_set_y, #train_set_y, train_set_y), axis=0) datasets[0] = [train_set_x, train_set_y] train_set_x, train_set_y = shared_dataset(datasets[0]) valid_set_x, valid_set_y = shared_dataset(datasets[1]) test_set_x, test_set_y = shared_dataset(datasets[2]) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels training_enabled = T.iscalar( 'training_enabled' ) # pseudo boolean for switching between training and prediction ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 3, 3), poolsize=(2, 2)) #print 'layer0.output.shape =' #print layer0.output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) layerbn = BatchNormalization(input_shape=(batch_size, nkerns[0], 15, 15), mode=1, momentum=0.9) layerbn_output = layerbn.get_result(layer0.output) #print 'layerbn_output.shape =' #print layerbn_output.shape.eval({x: np.random.rand(2,2).astype(dtype=theano.config.floatX)}) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer(rng, input=layerbn_output, image_shape=(batch_size, nkerns[0], 15, 15), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2_input, n_in=nkerns[1] * 6 * 6, n_out=4096, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer3 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer2.output, n_in=4096, n_out=2048, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer4 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer3.output, n_in=2048, n_out=1024, activation=T.nnet.relu) # construct a fully-connected sigmoidal layer layer5 = DropoutHiddenLayer(rng, is_train=training_enabled, input=layer4.output, n_in=1024, n_out=512, activation=T.nnet.relu) # classify the values of the fully-connected sigmoidal layer layer6 = LogisticRegression(input=layer5.output, n_in=512, n_out=10) # L1 norm ; one regularization option is to enforce L1 norm to # be small L1 = (abs(layer2.W).sum() + abs(layer3.W).sum() + abs(layer4.W).sum() + abs(layer5.W).sum() + abs(layer6.W).sum()) # square of L2 norm ; one regularization option is to enforce # square of L2 norm to be small L2_sqr = ((layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum()) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (layer6.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer6.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layer6.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params ''' # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum = theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # end-snippet-1 ############### # TRAIN MODEL # ############### train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose=True)
def test_emotionTraining(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = loadData() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### #Make learning rate a theano shared variable learning_rate = theano.shared(learning_rate) print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 1 * 48 * 48) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 48, 48)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape= (batch_size, 1, 48, 48), filter_shape= (nkerns[0],1,3,3), poolsize= (2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape= (batch_size, nkerns[0], 23, 23) , filter_shape= (nkerns[1],nkerns[0],4,4), poolsize= (2,2) ) # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape= (batch_size,nkerns[1],10,10), filter_shape= (nkerns[2],nkerns[1],3,3), poolsize= (2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out= batch_size, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= batch_size, n_out=7) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer0.params + layer1.params + layer2.params + layer3.params + layer4.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate.get_value().item() * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) getPofYGivenX = theano.function( [index], layer4.pOfYGivenX(), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] }, on_unused_input='ignore' ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) print('Training the model complete') f1 = open('layer0.W', 'wb') cPickle.dump(layer0.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer0.b', 'wb') cPickle.dump(layer0.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer1.W', 'wb') cPickle.dump(layer1.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer1.b', 'wb') cPickle.dump(layer1.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer2.W', 'wb') cPickle.dump(layer2.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer2.b', 'wb') cPickle.dump(layer2.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer3.W', 'wb') cPickle.dump(layer3.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer3.b', 'wb') cPickle.dump(layer3.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer4.W', 'wb') cPickle.dump(layer4.W.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() f1 = open('layer4.b', 'wb') cPickle.dump(layer4.b.get_value(), f1, protocol=cPickle.HIGHEST_PROTOCOL) f1.close() print("Saving the model complete") predictedList = getPofYGivenX(1) print("List of probabilities predicted = " + str(predictedList))
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2), weights = filter_weights ) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert(len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden,)*dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i-1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i-1] DNN_Layers.append( HiddenLayer( rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh )) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression( input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)