def test_SdA( finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset="../data/mnist.pkl.gz", batch_size=1, ): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ # datasets = load_data(dataset) datasets = load_mat("/home/ubuntu/var/train_32x32.mat", "/home/ubuntu/var/test_32x32.mat", 5000) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print "... building the model" # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=96 * 32, # hidden_layers_sizes=[1000, 1000, 1000], hidden_layers_sizes=[500], n_outs=10, ) ######################### # PRETRAINING THE MODEL # ######################### print "... getting the pretraining functions" pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print "... pre-training the model" start_time = time.clock() ## Pre-train layer-wise corruption_levels = [0.3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print "Pre-training layer %i, epoch %d, cost " % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ( "The pretraining code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) ) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print "... getting the finetuning functions" train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print "... finetunning the model" # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2.0 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0.0 start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print ( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print ( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = time.clock() print ( ("Optimization complete with best validation score of %f %%," "with test performance %f %%") % (best_validation_loss * 100.0, test_score * 100.0) ) print >> sys.stderr, ( "The training code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0) )
def test_dA(learning_rate=0.1, training_epochs=15, dataset='../data/mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ #datasets = load_data(dataset) #datasets = load_mat("/home/ubuntu/var/train_32x32.mat", \ # "/home/ubuntu/var/test_32x32.mat", 20000) datasets = load_mat("/home/ubuntu/var/train_32x32.mat", \ "/home/ubuntu/var/test_32x32.mat", 5000) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) """ #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=96 * 32, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0., learning_rate=learning_rate) train_da = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = PIL.Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(32, 96), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') """ for rate in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99]: print "Rate: %d" % (rate) ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=96 * 32, n_hidden=100) cost, updates = da.get_cost_updates(corruption_level=rate*1.0/100, learning_rate=learning_rate) train_da = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The % corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) bits = tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(32, 96), tile_shape=(10, 10), tile_spacing=(3, 3)) ri = PIL.Image.fromarray(numpy.array([[bits[j][i*3] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) bi = PIL.Image.fromarray(numpy.array([[bits[j][i*3+1] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) gi = PIL.Image.fromarray(numpy.array([[bits[j][i*3+2] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) """ image = PIL.Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(32, 96), tile_shape=(10, 10), tile_spacing=(1, 1))) """ image = PIL.Image.merge("RGB", (ri,gi,bi)) image.save('filters_corruption.color.%d.png' % (rate)) """ ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=96 * 32, n_hidden=500) cost, updates = da.get_cost_updates(corruption_level=0.5, learning_rate=learning_rate) train_da = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The 50% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) bits = tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(32, 96), tile_shape=(10, 10), tile_spacing=(3, 3)) ri = PIL.Image.fromarray(numpy.array([[bits[j][i*3] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) bi = PIL.Image.fromarray(numpy.array([[bits[j][i*3+1] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) gi = PIL.Image.fromarray(numpy.array([[bits[j][i*3+2] for i in xrange(0,32*10+9)] for j in xrange(0,32*10+27)])) image = PIL.Image.merge("RGB", (ri,gi,bi)) image.save('filters_corruption_50.color.png') """ os.chdir('../')
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=1000, dataset='../data/mnist.pkl.gz', batch_size=10): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ #datasets = load_data(dataset) datasets = load_mat("/home/ubuntu/var/train_32x32.mat", \ "/home/ubuntu/var/test_32x32.mat") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=96 * 32, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='../data/mnist.pkl.gz', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) #datasets = load_data(dataset) datasets = load_mat("/home/ubuntu/var/train_32x32.mat", \ "/home/ubuntu/var/test_32x32.mat") train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (96, 32) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 96, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 96, 32), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))