def test_autoencoder(): learning_rate = 0.1 training_epochs = 30 batch_size = 20 datasets = load_data('data/mnist.pkl.gz') train_set_x = datasets[0][0] # ミニバッチの数(教師データをbatch数で割るだけ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindexシンボル index = T.lscalar() # ミニバッチの学習データシンボル x = T.matrix('x') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # autoencoder モデル da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500) # コスト関数と更新式のシンボル cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate) # trainingの関数 train_da = theano.function([index], cost, updates=updates, givens={ x : train_set_x[index*batch_size : (index+1)*batch_size] }) fp = open("log/ae_cost.txt", "w") # training start_time = time.clock() for epoch in xrange(training_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, np.mean(c) fp.write('%d\t%f\n' % (epoch, np.mean(c))) end_time = time.clock() training_time = (end_time - start_time) fp.close() print "The no corruption code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((training_time / 60.0)) image = Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('log/dae_filters_corruption_00.png')
def nn_stochastic_gradient_descent(dataset=r'..\data\mnist.pkl.gz', n_epochs=100, alpha=0.01): train_set, valid_set, test_set = load_data(dataset) # Initialize neural network. nn = NN(numpy.random, 28 * 28, 100, 10) # Print header. print('Epoch\tTrainigError%%\tValidationError%%\tTestError%%') # Train network for limited number of epochs. for epoch in xrange(n_epochs): x, y = train_set for i in xrange(x.shape[0]): input = x[i].reshape(x.shape[1], 1) nn.forward(input) nn.backward(y[i]) nn.update_weights(alpha) # Measure accuracy on all data sets. train_error, train_errors = nn.test(train_set) valid_error, valid_errors = nn.test(valid_set) test_error, test_errors = nn.test(test_set) print ('%d\t%f\t%f\t%f' %(epoch, 100 * train_error, 100 * valid_error, 100 * test_error))
import plot import logistic_regression as lr data = lr.load_data('iris_data.csv') # Load the data plot.scatter_plot( data, ['Iris-setosa', 'Iris-versicolor']) # Scatter plot of the data X, y = lr.split(data) # Split into data and labels X_train, X_test, y_train, y_test = lr.train_test_split( X, y) # Split all the data into training and testing set theta = lr.SGD(X_train, y_train) # Run SGD to calculate optimal theta print('\nCalculated theta:\n {}'.format(theta)) hypothesis = lr.predict(X_test, theta) # Test the model lr.accuracy(hypothesis, y_test) plot.boundary(data, ['Iris-setosa', 'Iris-versicolor'], theta) # Plot the decision boundary
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='data/mnist.pkl.gz', batch_size=20, n_hidden=500): datasets = logistic_regression.load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindex index = T.lscalar() # 事例ベクトルx x = T.matrix('x') # int型の1次元ベクトル y = T.ivector('y') # ランダム変数 rng = np.random.RandomState(1234) # MLPの構築 classifier = MLP(rng=rng, input=x, n_in=28*28, n_hidden=n_hidden, n_out=10) # cost関数のシンボル 対数尤度と正則化項 cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr # ミニバッチごとのエラー率を計算するシンボル(test) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index+1)*batch_size], y: test_set_y[index * batch_size: (index+1)*batch_size] }) # ミニバッチごとのエラー率を計算するシンボル(validation) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index+1)*batch_size], y: valid_set_y[index * batch_size: (index+1)*batch_size] }) # 勾配の計算 back propagation # gparamsに格納した変数でコストを偏微分する gparams = [T.grad(cost, param) for param in classifier.params] # パラメータの更新式のシンボル(複数の更新式を定義するときは配列にする) # classifierのparamとgparamsを同時にループ、paramsとその微分gparamsを使ったパラメータの更新式 updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # 学習モデルでは、updatesに更新シンボルを入れてやれば良い train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index+1)*batch_size], y: train_set_y[index * batch_size: (index+1)*batch_size] }) print '... training' patience = 10000 patience_increase = 2 improvement_threashold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while(epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.)) if this_validation_loss < best_validation_loss: if(this_validation_loss < best_validation_loss * improvement_threashold): patience = max(patience, iter*patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = [test_model(i) for i in xrange(n_test_batches)] ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP( rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10 ) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-5 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) numpy.savetxt('hidden_weights.txt', classifier.hiddenLayer.W.get_value()) numpy.savetxt('hidden_biases.txt', classifier.hiddenLayer.b.get_value()) numpy.savetxt('output_weights.txt', classifier.logRegressionLayer.W.get_value()) numpy.savetxt('output_biases.txt', classifier.logRegressionLayer.b.get_value())
def get_data(dataset): datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] numpy.savetxt('test_set_x.txt', test_set_x.get_value()) numpy.savetxt('test_set_y.txt', test_set_y.eval())
def optimize_cnn_lenet(learning_rate=0.01, n_epochs=200, dataset='data/mnist.pkl.gz', batch_size=500, n_hidden=500, nkerns=[20, 50], rng=np.random.RandomState(23455)): print '... load training set' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindex index = T.lscalar() # dataシンボル x = T.matrix('x') # labelシンボル y = T.ivector('y') print '... building the model' # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする # 追加した1はチャンネル数 # ここではグレイスケール画像なのでチャンネル数は1 layer0_input = x.reshape((batch_size, 1, 28, 28)) # filterのnkerns[0]は20 layer0 = ConvLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5)) layer1 = PoolLayer(layer0.output, poolsize=(2, 2)) # filterのnkerns[1]は50 layer2 = ConvLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5)) layer3 = PoolLayer(layer2.output, poolsize=(2, 2)) # layer2_input # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする layer4_input = layer3.output.flatten(2) # 500ユニットの隠れレイヤー # layer2_inputで作成した入力ベクトルのサイズ=n_in layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[1]*4*4, n_out=n_hidden, activation=T.tanh) # 出力は500ユニット layer5 = LogisticRegression(input=layer4.output, n_in=n_hidden, n_out=10) # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる) cost = layer5.negative_log_likelihood(y) # testモデル # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する test_model = theano.function([index], layer5.errors(y), givens={x:test_set_x[index*batch_size : (index + 1)*batch_size], y: test_set_y[index*batch_size : (index + 1)*batch_size]}) # validationモデル validate_model = theano.function([index], layer5.errors(y), givens={x:valid_set_x[index*batch_size : (index + 1)*batch_size], y: valid_set_y[index*batch_size : (index + 1)*batch_size]}) # 微分用のパラメータ(pooling層にはパラメータがない) params = layer5.params + layer4.params + layer2.params + layer0.params # コスト関数パラメータについてのの微分 grads = T.grad(cost, params) # パラメータの更新 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] # trainモデル train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index*batch_size : (index + 1)*batch_size], y:train_set_y[index*batch_size : (index+1)*batch_size]}) # optimize print "train model ..." patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience/2) best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False fp1 = open('log/lenet_validation_error.txt', 'w') fp2 = open('log/lenet_test_error.txt', 'w') while(epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.)) fp1.write("%d\t%f\n" % (epoch, this_validation_loss*100)) if this_validation_loss < best_validation_loss: if(this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter*patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = [test_model(i) for i in xrange(n_test_batches)] ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.)) fp2.write("%d\t%f\n" % (epoch, test_score*100)) if patience <= iter: done_looping = True break fp1.close() fp2.close() end_time = timeit.default_timer() print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.)) import cPickle cPickle.dump(layer0, open("model/cnn_layer0.pkl", "wb")) cPickle.dump(layer2, open("model/cnn_layer2.pkl", "wb")) cPickle.dump(layer4, open("model/cnn_layer4.pkl", "wb")) cPickle.dump(layer5, open("model/cnn_layer5.pkl", "wb"))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='data/mnist.pkl.gz', batch_size=20, n_hidden=500): datasets = logistic_regression.load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindex index = T.lscalar() # 事例ベクトルx x = T.matrix('x') # int型の1次元ベクトル y = T.ivector('y') # ランダム変数 rng = np.random.RandomState(1234) # MLPの構築 classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # cost関数のシンボル 対数尤度と正則化項 cost = classifier.negative_log_likelihood( y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr # ミニバッチごとのエラー率を計算するシンボル(test) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # ミニバッチごとのエラー率を計算するシンボル(validation) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # 勾配の計算 back propagation # gparamsに格納した変数でコストを偏微分する gparams = [T.grad(cost, param) for param in classifier.params] # パラメータの更新式のシンボル(複数の更新式を定義するときは配列にする) # classifierのparamとgparamsを同時にループ、paramsとその微分gparamsを使ったパラメータの更新式 updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # 学習モデルでは、updatesに更新シンボルを入れてやれば良い train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print '... training' patience = 10000 patience_increase = 2 improvement_threashold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threashold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = [ test_model(i) for i in xrange(n_test_batches) ] ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(( 'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test(learning_rate = 0.01,l1_reg=0.0,l2_reg=0.0001, n_epoch=1000,batch_size=20,hidden_units=500): dataset = load_data() train_x,train_y = dataset[0] validation_x,validation_y = dataset[1] test_x,test_y = dataset[2] ## compute the number of minibatches n_train_batches = train_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_x.get_value(borrow=True).shape[0] // batch_size n_validation_batches = validation_x.get_value(borrow=True).shape[0] // batch_size print 'building the model....' index = T.lscalar() ## index x = T.matrix('x') y = T.ivector('y') ## labels random_state = np.random.RandomState(1234) classifier = MLP(random_stream = random_state, input = x, n_in = 28 * 28, n_hidden = hidden_units, n_out = 10) ## loss function (cost function) plus regularization (l1 norm and squared l2 norm) cost = ( classifier.neg_loglikelihood(y) + l1_reg * classifier.L1 + l2_reg * classifier.L2 ) test_model = theano.function( inputs =[index], outputs = classifier.error(y), givens = { x:test_x[index * batch_size : (index+1) * batch_size], y:test_y[index * batch_size : (index+1) * batch_size] } ) validation_model = theano.function( inputs = [index], outputs = classifier.error(y), givens ={ x:validation_x[index * batch_size : (index+1) * batch_size], y:validation_y[index * batch_size : (index+1) * batch_size] } ) ## gradient descent gparams = [T.grad(cost,params) for params in classifier.params] updates = [(params , params - learning_rate * gparams) for params,gparams in zip(classifier.params,gparams)] train_model = theano.function( inputs = [index], outputs = cost, updates = updates, givens = { x:train_x[index * batch_size : (index+1) * batch_size], y:train_y[index * batch_size : (index+1) * batch_size] } ) print 'complete the building model' print 'training the model....' ## early stopping patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches , patience //2) ## compute the validation per frequency best_validation_loss = np.inf best_iteration = 0. test_score = 0. start_time = time.time() epoch = 0 looping = False while (epoch < n_epoch ) and (not looping): epoch +=1 for minibatch_index in xrange(n_train_batches): minibatch_cost = train_model(minibatch_index) iteration = (epoch -1) * n_train_batches + minibatch_index if (iteration +1) % validation_frequency ==0: ## per validation validation_loss = [validation_model(i) for i in xrange(n_validation_batches)] ## compute loss per validation validation_loss_mean = np.mean(validation_loss) print ' %i epoch %i/%i minibatch , validation error %f' %(epoch, minibatch_index+1, n_train_batches, validation_loss_mean * 100.) ## got the best validation score and we predict the test dataset if validation_loss_mean < best_validation_loss: if (validation_loss_mean < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) ## save the best validation score and itearation best_validation_loss = validation_loss_mean best_iteration = iteration ## predict the test set test_score = [test_model(i) for i in xrange(n_test_batches)] test_score_mean = np.mean(test_score) print ' %i epoch , %i/%i minibatch , test score %f ' %(epoch, minibatch_index +1, n_train_batches, test_score_mean * 100.) if patience <= iteration: looping = True break end_time = time.time() print 'complete the training model' print 'Best validation loss %f \n Best iteration %d \n Test Score %f' %(best_validation_loss * 100 , best_iteration, test_score_mean * 100.) print 'Time is %0.2f' %((end_time - start_time) / 60)
def test_autoencoder(): learning_rate = 0.1 training_epochs = 30 batch_size = 20 datasets = load_data('data/mnist.pkl.gz') train_set_x = datasets[0][0] # ミニバッチの数(教師データをbatch数で割るだけ) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindexシンボル index = T.lscalar() # ミニバッチの学習データシンボル x = T.matrix('x') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # autoencoder モデル da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) # コスト関数と更新式のシンボル cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate) # trainingの関数 train_da = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) fp = open("log/ae_cost.txt", "w") # training start_time = time.clock() for epoch in xrange(training_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, np.mean(c) fp.write('%d\t%f\n' % (epoch, np.mean(c))) end_time = time.clock() training_time = (end_time - start_time) fp.close() print "The no corruption code for file " + os.path.split( __file__)[1] + " ran for %.2fm" % ((training_time / 60.0)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('log/dae_filters_corruption_00.png')
def optimize_lenet(learning_rate=0.01, n_epochs=200, dataset='data/mnist.pkl.gz', batch_size=500, n_hidden=500, nkerns=[20, 50], rng=np.random.RandomState(23455)): print '... load training set' datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # ミニバッチのindex index = T.lscalar() # dataシンボル x = T.matrix('x') # labelシンボル y = T.ivector('y') print '... building the model' # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする # 追加した1はチャンネル数 # ここではグレイスケール画像なのでチャンネル数は1 layer0_input = x.reshape((batch_size, 1, 28, 28)) # layer0 # filterのnkerns[0]は20 layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # layer1 # filterのnkerns[1]は50 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # layer2_input # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする layer2_input = layer1.output.flatten(2) # layer2 # 500ユニットの隠れレイヤー # layer2_inputで作成した入力ベクトルのサイズ=n_in layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=n_hidden, activation=T.tanh) # layer3 # 出力は500ユニット layer3 = LogisticRegression(input=layer2.output, n_in=n_hidden, n_out=10) # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる) cost = layer3.negative_log_likelihood(y) # testモデル # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) # validationモデル validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # 微分用のパラメータ params = layer3.params + layer2.params + layer1.params + layer0.params # コスト関数パラメータについてのの微分 grads = T.grad(cost, params) # パラメータの更新 updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] # trainモデル train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # optimize print "train model ..." patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False fp1 = open('log/lenet_validation_error.txt', 'w') fp2 = open('log/lenet_test_error.txt', 'w') while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) fp1.write("%d\t%f\n" % (epoch, this_validation_loss * 100)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = [ test_model(i) for i in xrange(n_test_batches) ] ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) fp2.write("%d\t%f\n" % (epoch, test_score * 100)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(( 'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) fp1.close() fp2.close() import cPickle cPickle.dump(layer0, open("model/lenet_layer0.pkl", "wb")) cPickle.dump(layer1, open("model/lenet_layer1.pkl", "wb"))
def optimize_stacked_autoencoder(n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10, corruption_levels=[0.1, 0.2, 0.3], pretraining_epochs=30, pretrain_lr=0.001, training_epochs=1000, finetune_lr=0.1, dataset='data/mnist.pkl.gz', batch_size=1): """ 各事前学習のエポック数、事前学習の学習率、finetuneのエポック数、finetuneの学習率、学習データああセット、ミニバッチサイズ""" assert len(hidden_layers_sizes) == len(corruption_levels) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # 教師バッチ数 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size numpy_rng = np.random.RandomState(89677) print "building the model ..." sda = SdA(numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs) print "getting the pretraining functions ..." pretraining_functions = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print "pre-training the model ..." start_time = timeit.default_timer() # 層ごとにAutoEncode for i in xrange(sda.n_layers): for epoch in xrange(pretraining_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_functions[i]( index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print "Pre-training layer %i, epoch %d, cost %f" % (i, epoch, np.mean(c)) end_time = timeit.default_timer() training_time = end_time - start_time print "The pretraining code for file %s ran for %.2fm" % ( os.path.split(__file__)[1], training_time / 60.0) # AutoEncodeされたネットワークをfinetuningする関数を取得 print "get the finetuning functions ..." if datasets is None: print 'dataset is None' if batch_size is None: print 'batch_size is None' if finetune_lr is None: print 'finetune_lr is None' train_model, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print "fine-tuning the model ..." patience = 10 * n_train_batches patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False fp1 = open('log/SdA_validation_error.txt', 'w') fp2 = open('log/SdA_test_error.txt', 'w') while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす validation_losses = validate_model() # 平均してscoreにする this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) fp1.write("%d\t%f\n" % (epoch, this_validation_loss * 100)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す test_losses = test_model() ## 平均してscoreにする test_score = np.mean(test_losses) ## print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) fp2.write("%d\t%f\n" % (epoch, test_score * 100)) if patience <= iter: done_looping = True break fp1.close() fp2.close() end_time = timeit.default_timer() print(( 'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_process(cost_norm_reg_l1=0.00,cost_norm_reg_l2=0.001,#used for L1(or L2)-norm regulation learning_rate=0.13, batch_size = 500,#using stochastic gradient descent with mini-batch epochs = 1000,#define how many times we pass the training data validate_frequency = None#validate data after how many patches we trained ): #loading training,validate,test data training_data,validate_data,test_data = load_data(r"mnist.pkl.gz") #train config n_train_batch = int(training_data.feature.get_value().shape[0] / batch_size) n_validate_batch = int(validate_data.feature.get_value().shape[0] / batch_size) n_test_batch = int(test_data.feature.get_value().shape[0] / batch_size) if validate_frequency is None: validate_frequency = n_train_batch '''compile:train,validate,test function ''' #compile train function x = T.fmatrix('x') y = T.ivector('y') index = T.lscalar('index') #set n_in = 28*28 n_hidden = 500,n_out = 10,reg_l1 = 0.00 reg_l2 = 0.001 mlp = MLP(x,28*28,500,10) cost = mlp.get_reg_cost(y, cost_norm_reg_l1,cost_norm_reg_l2) t_params = T.grad(cost,mlp.params) updates = [(param,param-learning_rate*t_param) for (param,t_param) in zip(mlp.params,t_params)] train = function(inputs = [index], outputs = [mlp.Last_layer.get_errors(y)], updates = updates, givens = [ (x,training_data.feature[index*batch_size:(index+1)*batch_size]), (y,training_data.label[index*batch_size:(index+1)*batch_size]),] ) #compile validate function validate = function(inputs = [index], outputs = [mlp.Last_layer.get_errors(y)], givens = [ (x,validate_data.feature[index*batch_size:(index+1)*batch_size]), (y,validate_data.label[index*batch_size:(index+1)*batch_size]),] ) #conpile test function test = function(inputs = [index], outputs = [mlp.Last_layer.get_errors(y)], givens = [ (x,test_data.feature[index*batch_size:(index+1)*batch_size]), (y,test_data.label[index*batch_size:(index+1)*batch_size]),] ) #begin training process best_error = np.inf epoch = 0 patience = 10000 patience_increase = 2 error_significant = 0.01 stop_training = False while epoch < epochs and not stop_training: epoch += 1 for index in range(n_train_batch): error = train(index) # print('error:{}'.format(error)) passed_batches = (epoch-1)*n_train_batch + index+1 if passed_batches%validate_frequency==0: #pass the validate data val_error = np.mean([validate(i) for i in range(n_validate_batch)]) print("pass validate with validation_error:{} current iteration:{}/{}".format( val_error,passed_batches,min(patience,epochs*n_train_batch))) if val_error < best_error:#when get a better results if val_error <= best_error*(1-error_significant): patience = max(patience,passed_batches*patience_increase) best_error = val_error#update error #pass the test data test_error = np.mean([test(i) for i in range(n_test_batch)]) print("model improves with test accuray:%{:2}".format(100*(1-test_error))) mlp.save() if passed_batches>patience: stop_training = True
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): ''' Stochastic gradient descent optimization for a multilayer perception @learning_rate -type : float -param : learning rate used @L1_reg -type : float -param : L1-norm's weight when added to the cost @L2_reg -type : float -param : L2-norm's weight when added to the cost @n_epochs -type : int -param : maximal number of epochs to run the optimizer ''' datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) classifier = MLP( rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10 ) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) in_sample_test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) print '... training' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False x_axis = [] y_axis = [] while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) if this_validation_loss < best_validation_loss: if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) x_axis.append(epoch) y_axis.append(test_score * 100.) if patience <= iter: done_looping = True break in_sample_losses = [in_sample_test_model(i) for i in xrange(n_train_batches)] in_sample_score = np.mean(in_sample_losses) print('##in sample test error of %f %%' % (in_sample_score * 100.)) end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) plt.plot(np.asarray(x_axis), np.asarray(y_axis)) plt.show()