def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=1000, dataset='../data/mnist.pkl.gz', batch_size=10): """ Demonstrates how to train and test a Deep Belief Network. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='../data/mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray( tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray(test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='../data/mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared(numpy.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')