def sample_rbm(rbm, dataset = dataset, n_chains = 20, n_samples = 10, output_folder = location + 'rbm_plots'): if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = shared_data(test_set_x) ############### #Sampling RBM # ############### print '...start sampling' rng = numpy.random.RandomState(123) n_test_samples = test_set_x.get_value(borrow = True).shape[0] test_idx = rng.randint(n_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray(test_set_x.get_value(borrow = True)[test_idx : test_idx + n_chains], dtype = theano.config.floatX) ) plot_every = 1000 ([pre_h, h_m, h_samples, pre_v, v_m, v_samples], updates) = \ theano.scan(rbm.gibbs_vhv, outputs_info = [None, None, None, None, None, persistent_vis_chain], n_steps = plot_every) updates.update({persistent_vis_chain : v_samples[-1]}) sample_fn = theano.function([], [v_m[-1], v_samples[-1]], \ updates = updates, name = 'sample_fn') img_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype = 'uint8') for idx in range(n_samples): v_m, v_sample = sample_fn() print "plot sample %d" %idx img_data[29 * idx : 29 * idx + 28, :] = \ tile_raster_images(X = v_m, img_shape = (28, 28), tile_shape = (1, n_chains), tile_spacing = (1, 1)) image = Image.fromarray(img_data) image.save('sample.png') os.chdir('../')
def train_DBN(dataset = dataset): datasets = load_data(dataset, shared = True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) rbm1 = RBM(input = x, numpy_rng = rng, theano_rng = theano_rng,\ n_visible = 784, n_hidden = 529) rbm2 = RBM(input = x, numpy_rng = rng, theano_rng = theano_rng,\ n_visible = 529, n_hidden = 529) lgs = LogisticRegression(input = x, n_in = 529, n_out = 10) print '...train rbm1' time_train_rbm1 = train_rbm(rbm1, index, x, train_set_x, 'rbm1') #rbm1 = cPickle.load( open(location + 'rbm1.pkl', 'r')) print '...train rbm2' pre_sigm_h1, train_set_h1 = rbm1.prob_up(train_set_x) rbm2_train_x = shared_data(train_set_h1.eval()) time_train_rbm2 = train_rbm(rbm2, index, x, rbm2_train_x, 'rbm2') #rbm2 = cPickle.load( open(location + 'rbm2.pkl', 'r')) print '...dbn fintuning' dbn = DBN(x, [rbm1.W, rbm2.W], [rbm1.hbias, rbm2.hbias], lgs.W, lgs.b) time_train_dbn = train_classifier(dbn, index, x, y, datasets = [[train_set_x, train_set_y], [valid_set_x, valid_set_y], [test_set_x, test_set_y]]) print 'Train DBN total cost %.2f' %((time_train_rbm1 + time_train_rbm2 + time_train_dbn) / 60.)
def StackedDA_sgd(dataset= dataset): print '...load data' datasets = load_data(dataset, shared = True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print '...build model' index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da1 = DA(numpy_rng = rng, theano_rng = theano_rng, input = x, n_visible = 784, n_hide = 529) da2 = DA(numpy_rng = rng, theano_rng = theano_rng, input = x, n_visible = 529, n_hide = 484) lgs = LogisticRegression(input = x, n_in = 484, n_out = 10) print '...train 1st Da' time_train_da1 = train_da(da1, index, x, train_set_x, 'da1_30.png', corruption_level = 0.3, learning_rate = 0.1, training_epochs = 15, batch_size=20) print '...train 2ed Da' da2_train_x = shared_data( da1.get_hidden_value(train_set_x).eval()) time_train_da2 = train_da(da2, index, x, da2_train_x, 'da2_30.png', corruption_level = 0.15, learning_rate = 0.1, training_epochs = 15, batch_size=20) print '...fine-tuning' stacked_da = StackedDA(x, [da1.W, da2.W], [da1.b, da2.b], lgs.W, lgs.b) time_train_sda = train_classifier(stacked_da, index, x, y, [[train_set_x, train_set_y], [valid_set_x, valid_set_y], [test_set_x, test_set_y]]) print 'Tiraning Total cost time %.2fm'%((time_train_da1 + time_train_da2 + time_train_sda) / 60.)