def test_dA(learning_rate=0.01, training_epochs=15000, dataset='mnist.pkl.gz', batch_size=5, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ ##datasets = load_data(dataset) #from SdA_mapping import load_data_half #datasets = load_data_half(dataset) print 'loading data' datasets, x_mean, y_mean, x_std, y_std = load_vc() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print 'loaded data' # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x1 = T.matrix('x1') # the data is presented as rasterized images x2 = T.matrix('x2') # the data is presented as rasterized images cor_reg = T.scalar('cor_reg') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) #da = dA_joint( #numpy_rng=rng, #theano_rng=theano_rng, #input1=x1, #input2=x2, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, #n_hidden=500 #) print 'initialize functions' da = dA_joint( numpy_rng=rng, theano_rng=theano_rng, input1=x1, input2=x2, cor_reg=cor_reg, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, n_visible1=24, n_visible2=24, n_hidden=50) cost, updates = da.get_cost_updates(corruption_level=0.3, learning_rate=learning_rate) cor_reg_val = numpy.float32(5.0) train_da = theano.function( [index], cost, updates=updates, givens={ x1: train_set_x[index * batch_size:(index + 1) * batch_size], x2: train_set_y[index * batch_size:(index + 1) * batch_size] }) fprop_x1 = theano.function([], outputs=da.output1, givens={x1: test_set_x}, name='fprop_x1') fprop_x2 = theano.function([], outputs=da.output2, givens={x2: test_set_y}, name='fprop_x2') fprop_x1t = theano.function([], outputs=da.output1, givens={x1: train_set_x}, name='fprop_x1') fprop_x2t = theano.function([], outputs=da.output2, givens={x2: train_set_y}, name='fprop_x2') rec_x1 = theano.function([], outputs=da.rec1, givens={x1: test_set_x}, name='rec_x1') rec_x2 = theano.function([], outputs=da.rec2, givens={x2: test_set_y}, name='rec_x2') fprop_x1_to_x2 = theano.function([], outputs=da.reg, givens={x1: test_set_x}, name='fprop_x12x2') updates_reg = [(da.cor_reg, da.cor_reg + theano.shared(numpy.float32(0.1))) ] update_reg = theano.function([], updates=updates_reg) print 'initialize functions ended' start_time = time.clock() ############ # TRAINING # ############ print 'training started' X1 = test_set_x.eval() X1 *= x_std X1 += x_mean X2 = test_set_y.eval() X2 *= y_std X2 += y_mean from dcca_numpy import cor_cost # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #cor_reg_val += 1 #da.cor_reg = theano.shared(cor_reg_val) update_reg() X1H = rec_x1() X2H = rec_x2() X1H *= x_std X1H += x_mean X2H *= y_std X2H += y_mean H1 = fprop_x1() H2 = fprop_x2() print 'Training epoch' print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\ numpy.mean(numpy.mean((X2H-X2)**2,1)) if epoch % 5 == 2: # pretrain middle layer print '... pre-training MIDDLE layer' H1t = fprop_x1t() H2t = fprop_x2t() h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of from mlp import HiddenLayer numpy_rng = numpy.random.RandomState(89677) log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh) if 1: # for middle layer learning_rate = 0.1 #H1=theano.shared(H1) #H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [(param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams)] train_fn_middle = theano.function(inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: theano.shared(H1t), h2: theano.shared(H2t) }, name='train_middle') epoch = 0 while epoch < 100: print epoch, train_fn_middle() epoch += 1 ##X2H=fprop_x1_to_x2() X2H = numpy.tanh(H1.dot(log_reg.W.eval()) + log_reg.b.eval()) X2H = numpy.tanh(X2H.dot(da.W2_prime.eval()) + da.b2_prime.eval()) X2H *= y_std X2H += y_mean print 'Regression ', numpy.mean(numpy.mean((X2H - X2)**2, 1)) print 'Correlation ', cor_cost(H1, H2) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W1.get_value(borrow=True).T, img_shape=(28, 14), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') from matplotlib import pyplot as pp pp.plot(H1[:10, :2], 'b') pp.plot(H2[:10, :2], 'r') pp.show() print cor
def test_dA(learning_rate=0.01, training_epochs=15000, dataset='mnist.pkl.gz', batch_size=5, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ ##datasets = load_data(dataset) #from SdA_mapping import load_data_half #datasets = load_data_half(dataset) print 'loading data' datasets, x_mean, y_mean, x_std, y_std = load_vc() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print 'loaded data' # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x1 = T.matrix('x1') # the data is presented as rasterized images x2 = T.matrix('x2') # the data is presented as rasterized images cor_reg = T.scalar('cor_reg') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) #da = dA_joint( #numpy_rng=rng, #theano_rng=theano_rng, #input1=x1, #input2=x2, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, #n_hidden=500 #) print 'initialize functions' da = dA_joint( numpy_rng=rng, theano_rng=theano_rng, input1=x1, input2=x2, cor_reg=cor_reg, #n_visible1=28 * 28/2, #n_visible2=28 * 28/2, n_visible1=24, n_visible2=24, n_hidden=50 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) cor_reg_val = numpy.float32(5.0) train_da = theano.function( [index], cost, updates=updates, givens={ x1: train_set_x[index * batch_size: (index + 1) * batch_size], x2: train_set_y[index * batch_size: (index + 1) * batch_size] } ) fprop_x1 = theano.function( [], outputs=da.output1, givens={ x1: test_set_x }, name='fprop_x1' ) fprop_x2 = theano.function( [], outputs=da.output2, givens={ x2: test_set_y }, name='fprop_x2' ) fprop_x1t = theano.function( [], outputs=da.output1, givens={ x1: train_set_x }, name='fprop_x1' ) fprop_x2t = theano.function( [], outputs=da.output2, givens={ x2: train_set_y }, name='fprop_x2' ) rec_x1 = theano.function( [], outputs=da.rec1, givens={ x1: test_set_x }, name='rec_x1' ) rec_x2 = theano.function( [], outputs=da.rec2, givens={ x2: test_set_y }, name='rec_x2' ) fprop_x1_to_x2 = theano.function( [], outputs=da.reg, givens={ x1: test_set_x }, name='fprop_x12x2' ) updates_reg = [ (da.cor_reg, da.cor_reg+theano.shared(numpy.float32(0.1))) ] update_reg = theano.function( [], updates=updates_reg ) print 'initialize functions ended' start_time = time.clock() ############ # TRAINING # ############ print 'training started' X1=test_set_x.eval() X1 *= x_std X1 += x_mean X2=test_set_y.eval() X2 *= y_std X2 += y_mean from dcca_numpy import cor_cost # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) #cor_reg_val += 1 #da.cor_reg = theano.shared(cor_reg_val) update_reg() X1H=rec_x1() X2H=rec_x2() X1H *= x_std X1H += x_mean X2H *= y_std X2H += y_mean H1=fprop_x1() H2=fprop_x2() print 'Training epoch' print 'Reconstruction ', numpy.mean(numpy.mean((X1H-X1)**2,1)),\ numpy.mean(numpy.mean((X2H-X2)**2,1)) if epoch%5 == 2 : # pretrain middle layer print '... pre-training MIDDLE layer' H1t=fprop_x1t() H2t=fprop_x2t() h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of from mlp import HiddenLayer numpy_rng = numpy.random.RandomState(89677) log_reg = HiddenLayer(numpy_rng, h1, 50, 50, activation=T.tanh) if 1: # for middle layer learning_rate = 0.1 #H1=theano.shared(H1) #H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [ (param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams) ] train_fn_middle = theano.function( inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: theano.shared(H1t), h2: theano.shared(H2t) }, name='train_middle' ) epoch = 0 while epoch < 100: print epoch, train_fn_middle() epoch += 1 ##X2H=fprop_x1_to_x2() X2H=numpy.tanh(H1.dot(log_reg.W.eval())+log_reg.b.eval()) X2H=numpy.tanh(X2H.dot(da.W2_prime.eval())+da.b2_prime.eval()) X2H *= y_std X2H += y_mean print 'Regression ', numpy.mean(numpy.mean((X2H-X2)**2,1)) print 'Correlation ', cor_cost(H1, H2) end_time = time.clock() training_time = (end_time - start_time) print >> sys.stderr, ('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=da.W1.get_value(borrow=True).T, img_shape=(28, 14), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') from matplotlib import pyplot as pp pp.plot(H1[:10,:2],'b');pp.plot(H2[:10,:2],'r');pp.show() print cor
def test_SdA_regress(finetune_lr=0.05, pretraining_epochs=10, pretrain_lr=0.1, training_epochs=10000, dataset='mnist.pkl.gz', batch_size=20): datasets = load_data_half(dataset) train_set_x, train_set_y = datasets[0]## valid_set_x, valid_set_y = datasets[1]## test_set_x, test_set_y = datasets[2]## train_set_x=train_set_x.eval() train_set_y=train_set_y.eval() import theano train_set_x_lab=train_set_x[:,:] train_set_x_unlab=train_set_x[:,:] train_set_y_lab=train_set_y[:,:] train_set_y_unlab=train_set_y[:,:] train_set_x_lab=theano.shared(numpy.asarray(train_set_x_lab, dtype=theano.config.floatX), borrow=True) train_set_y_lab=theano.shared(numpy.asarray(train_set_y_lab, dtype=theano.config.floatX), borrow=True) train_set_x_unlab=theano.shared(numpy.asarray(train_set_x_unlab, dtype=theano.config.floatX), borrow=True) train_set_y_unlab=theano.shared(numpy.asarray(train_set_y_unlab, dtype=theano.config.floatX), borrow=True) # compute number of minibatches for training, validation and testing n_train_batches_l = train_set_y_lab.eval().shape[0] n_train_batches_l /= batch_size n_train_batches_u = train_set_y_unlab.eval().shape[0] n_train_batches_u /= batch_size # compute number of minibatches for training, validation and testing #n_train_batches = train_set_x.get_value(borrow=True).shape[0] #n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class #from SdA_orig import SdA as SdA_old hidden_layer_size = 100 SdA_inp = SdA(numpy_rng, n_ins=392, hidden_layers_sizes=[hidden_layer_size] ) SdA_out = SdA(numpy_rng, n_ins=392, hidden_layers_sizes=[hidden_layer_size] ) # PRETRAINING THE MODEL # if 0 : # pretrain inp ae print '... getting the pretraining functions for INPUT AE' pretraining_fns = SdA_inp.pretraining_functions(train_set_x=train_set_x_unlab, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(SdA_inp.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches_u): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if 0 : # pretrain out ae print '... getting the pretraining functions for OUTPUT AE' pretraining_fns = SdA_out.pretraining_functions(train_set_x=train_set_y_unlab, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.5, .2, .3] for i in xrange(SdA_out.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches_u): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if 0: # save aes f=open('aes_shallow_sig_nobias.pkl', 'w+') import pickle pickle.dump(SdA_inp, f) pickle.dump(SdA_out, f) f.flush() f.close() if 0: # load aes f=open('aes_shallow_sig_nobias.pkl', 'r') import pickle SdA_inp=pickle.load(f) SdA_out=pickle.load(f) f.close() if 1: # cca from dcca_numpy import netCCA_nobias, netCCA, dCCA from mlp_numpy import expit, logistic_prime, linear, linear_prime, relu, relu_prime, tanh, tanh_prime train_y1 = train_set_x_lab.eval() train_y2 = train_set_y_lab.eval() test_y1 = test_set_x.eval() test_y2 = test_set_y.eval() ##param1=((train_y1.shape[1],0,0),(2038, relu, relu_prime),(50, relu, relu_prime)) ##param2=((train_y2.shape[1],0,0),(1608, relu, relu_prime),(50, relu, relu_prime)) param1=((train_y1.shape[1],0,0),(hidden_layer_size, expit, logistic_prime)) param2=((train_y2.shape[1],0,0),(hidden_layer_size, expit, logistic_prime)) W1s = [] b1s = [] for i in range(len(SdA_inp.dA_layers)): W1s.append( SdA_inp.dA_layers[i].W.T.eval() ) ##b1s.append( SdA_inp.dA_layers[i].b.eval() ) ##b1s[-1] = b1s[-1].reshape((b1s[-1].shape[0], 1)) W2s = [] b2s = [] for i in range(len(SdA_out.dA_layers)): W2s.append( SdA_out.dA_layers[i].W.T.eval() ) ##b2s.append( SdA_out.dA_layers[i].b.eval() ) ##b2s[-1] = b2s[-1].reshape((b2s[-1].shape[0], 1)) numpy.random.seed(0) N1=netCCA_nobias(train_y1,param1, W1s) N2=netCCA_nobias(train_y2,param2, W2s) N = dCCA(train_y1, train_y2, N1, N2) N1.reconstruct(test_set_x.eval()[0,:]) cnt = 0 from dcca_numpy import cca_cost, cca, order_cost, cor_cost while True: X=N1.predict(test_set_x.eval()) Y=N2.predict(test_set_y.eval()) _H1 = numpy.dot(X, N.A1) _H2 = numpy.dot(Y, N.A2) print '****', cnt, cor_cost(_H1, _H2) X1_rec = numpy.tanh(X.dot(N1.weights[0])) X2_rec = numpy.tanh(Y.dot(N2.weights[0])) param=((hidden_layer_size,0,0),(hidden_layer_size, relu, relu_prime)) from mlp_numpy import NeuralNetwork as NN lr=NN(X,Y,param) lr.train(X[:,:],Y[:,:],10, 0.005) Yh=lr.predict(X[:,:]) X2_reg = N2.fs[-1](numpy.dot(Yh,N2.weights[0])) #X2_reg = N2.fs[-1](numpy.dot(_H1.dot(numpy.linalg.inv(N.A1)),N2.weights[0])) print '****', 'mse1:', numpy.mean((X1_rec-test_set_x.eval())**2.0) print '****', 'mse2:', numpy.mean((X2_rec-test_set_y.eval())**2.0) print '****', 'mse_map:', numpy.mean((X2_reg-test_set_y.eval())**2.0) if cnt % 2: N.train(5, True, 10000.0) else: N.train(5, False, 10000.0) cnt += 1 f=open('netcca.pkl', 'w+') import pickle pickle.dump(N, f) pickle.dump(N, f) f.flush() f.close() if cnt == 200: break for i in range(len(SdA_inp.dA_layers)): SdA_inp.dA_layers[i].W = theano.shared( N1.weights[i].T ) SdA_inp.dA_layers[i].b = theano.shared( N1.biases[i][:,0] ) for i in range(len(SdA_out.dA_layers)): SdA_out.dA_layers[i].W = theano.shared( N2.weights[i].T ) SdA_out.dA_layers[i].b = theano.shared( N2.weights[i][:,0] ) if 1 : # pretrain middle layer print '... pre-training MIDDLE layer' h1 = T.matrix('x') # the data is presented as rasterized images h2 = T.matrix('y') # the labels are presented as 1D vector of log_reg = HiddenLayer(numpy_rng, h1, hidden_layer_size, hidden_layer_size) if 1: # for middle layer learning_rate = 0.01 fprop_inp = theano.function( [], SdA_inp.sigmoid_layers[-1].output, givens={ SdA_inp.sigmoid_layers[0].input: train_set_x_lab }, name='fprop_inp' ) fprop_out = theano.function( [], SdA_out.sigmoid_layers[-1].output, givens={ SdA_out.sigmoid_layers[0].input: train_set_y_lab }, name='fprop_out' ) #H11=fprop_inp() #H21=fprop_out() ##H1=N1.predict(train_set_x.eval()) ##H2=N2.predict(train_set_y.eval()) H1=fprop_inp() H2=fprop_out() H1=theano.shared(H1) H2=theano.shared(H2) # compute the gradients with respect to the model parameters logreg_cost = log_reg.mse(h2) gparams = T.grad(logreg_cost, log_reg.params) # compute list of fine-tuning updates updates = [ (param, param - gparam * learning_rate) for param, gparam in zip(log_reg.params, gparams) ] train_fn_middle = theano.function( inputs=[], outputs=logreg_cost, updates=updates, givens={ h1: H1, h2: H2 }, name='train_middle' ) epoch = 0 while epoch < 10: print epoch, train_fn_middle() epoch += 1 sda = SdA_regress( SdA_inp, SdA_out, log_reg, numpy_rng=numpy_rng, n_inp=28*28//2, hidden_layers_sizes_inp=[hidden_layer_size], hidden_layers_sizes_out=[hidden_layer_size], n_out=28*28//2 ) # end-snippet-3 start-snippet-4 # end-snippet-4 # FINETUNING THE MODEL # # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches_l # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches_l, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 fprop = theano.function( [], sda.sigmoid_layers[-1].output, givens={ sda.x: test_set_x }, name='fprop' ) while True: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches_l): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches_l + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches_l, this_validation_loss )) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches_l, test_score )) if patience <= iter: done_looping = True #break if 0: # vis weights fprop = theano.function( [], sda.sigmoid_layers[-1].output, givens={ sda.x: test_set_x }, name='fprop' ) yh=fprop() yh=yh end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss , best_iter + 1, test_score) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))