Пример #1
0
def test_autoencoder():
    learning_rate = 0.1
    training_epochs = 30
    batch_size = 20

    datasets = load_data('data/mnist.pkl.gz')

    train_set_x = datasets[0][0]

    # ミニバッチの数(教師データをbatch数で割るだけ)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindexシンボル
    index = T.lscalar()

    # ミニバッチの学習データシンボル
    x = T.matrix('x')

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # autoencoder モデル
    da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500)

    # コスト関数と更新式のシンボル
    cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate)

    # trainingの関数
    train_da = theano.function([index], cost, updates=updates, givens={
            x : train_set_x[index*batch_size : (index+1)*batch_size]
        })

    fp = open("log/ae_cost.txt", "w")

    # training
    start_time = time.clock()
    for epoch in xrange(training_epochs):
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))
        print 'Training epoch %d, cost ' % epoch, np.mean(c)
        fp.write('%d\t%f\n' % (epoch, np.mean(c)))

    end_time = time.clock()

    training_time = (end_time - start_time)

    fp.close()

    print "The no corruption code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((training_time / 60.0))
    
    image = Image.fromarray(tile_raster_images(
    X=da.W.get_value(borrow=True).T,
    img_shape=(28, 28), tile_shape=(10, 10),
    tile_spacing=(1, 1)))
    image.save('log/dae_filters_corruption_00.png')
Пример #2
0
def nn_stochastic_gradient_descent(dataset=r'..\data\mnist.pkl.gz', n_epochs=100, alpha=0.01):
        train_set, valid_set, test_set = load_data(dataset)
        # Initialize neural network.
        nn = NN(numpy.random, 28 * 28, 100, 10)
        # Print header.
        print('Epoch\tTrainigError%%\tValidationError%%\tTestError%%')
        # Train network for limited number of epochs.
        for epoch in xrange(n_epochs):
            x, y = train_set
            for i in xrange(x.shape[0]):
                input = x[i].reshape(x.shape[1], 1)
                nn.forward(input)
                nn.backward(y[i])
                nn.update_weights(alpha)
            # Measure accuracy on all data sets.
            train_error, train_errors = nn.test(train_set)
            valid_error, valid_errors = nn.test(valid_set)
            test_error, test_errors = nn.test(test_set)
            print ('%d\t%f\t%f\t%f' %(epoch, 100 * train_error, 100 * valid_error, 100 * test_error))
Пример #3
0
import plot
import logistic_regression as lr

data = lr.load_data('iris_data.csv')  # Load the data

plot.scatter_plot(
    data, ['Iris-setosa', 'Iris-versicolor'])  # Scatter plot of the data

X, y = lr.split(data)  # Split into data and labels

X_train, X_test, y_train, y_test = lr.train_test_split(
    X, y)  # Split all the data into training and testing set

theta = lr.SGD(X_train, y_train)  # Run SGD to calculate optimal theta
print('\nCalculated theta:\n {}'.format(theta))

hypothesis = lr.predict(X_test, theta)  # Test the model

lr.accuracy(hypothesis, y_test)

plot.boundary(data, ['Iris-setosa', 'Iris-versicolor'],
              theta)  # Plot the decision boundary
Пример #4
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='data/mnist.pkl.gz', batch_size=20, n_hidden=500):
    datasets = logistic_regression.load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindex
    index = T.lscalar()
    # 事例ベクトルx
    x = T.matrix('x')
    # int型の1次元ベクトル
    y = T.ivector('y')

    # ランダム変数
    rng = np.random.RandomState(1234)

    # MLPの構築
    classifier = MLP(rng=rng, input=x, n_in=28*28, n_hidden=n_hidden, n_out=10)    

    # cost関数のシンボル 対数尤度と正則化項
    cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    # ミニバッチごとのエラー率を計算するシンボル(test)
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index+1)*batch_size],
            y: test_set_y[index * batch_size: (index+1)*batch_size]
        })

    # ミニバッチごとのエラー率を計算するシンボル(validation)
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index+1)*batch_size],
            y: valid_set_y[index * batch_size: (index+1)*batch_size]
        })
    
    # 勾配の計算 back propagation
    # gparamsに格納した変数でコストを偏微分する
    gparams = [T.grad(cost, param) for param in classifier.params]

    # パラメータの更新式のシンボル(複数の更新式を定義するときは配列にする)
    # classifierのparamとgparamsを同時にループ、paramsとその微分gparamsを使ったパラメータの更新式
    updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)]

    # 学習モデルでは、updatesに更新シンボルを入れてやれば良い
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index+1)*batch_size],
            y: train_set_y[index * batch_size: (index+1)*batch_size]
        })

    print '... training'
    patience = 10000
    patience_increase = 2
    improvement_threashold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while(epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.))
            
                if this_validation_loss < best_validation_loss:
                    if(this_validation_loss < best_validation_loss * improvement_threashold):
                        patience = max(patience, iter*patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    
                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ## 
                    print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.))

            if patience <= iter:
                done_looping = True
                break
            
    end_time = timeit.default_timer()
    print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=10
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    numpy.savetxt('hidden_weights.txt', classifier.hiddenLayer.W.get_value())
    numpy.savetxt('hidden_biases.txt', classifier.hiddenLayer.b.get_value())
    numpy.savetxt('output_weights.txt', classifier.logRegressionLayer.W.get_value())
    numpy.savetxt('output_biases.txt', classifier.logRegressionLayer.b.get_value())
def get_data(dataset):
    datasets = load_data(dataset)    
    test_set_x, test_set_y = datasets[2]    
    numpy.savetxt('test_set_x.txt', test_set_x.get_value())
    numpy.savetxt('test_set_y.txt', test_set_y.eval())
Пример #7
0
def optimize_cnn_lenet(learning_rate=0.01, n_epochs=200, dataset='data/mnist.pkl.gz', batch_size=500, n_hidden=500, nkerns=[20, 50], rng=np.random.RandomState(23455)):
    print '... load training set'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindex
    index = T.lscalar()

    # dataシンボル
    x = T.matrix('x')
    # labelシンボル
    y = T.ivector('y')

    print '... building the model'
    # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする
    # 追加した1はチャンネル数
    # ここではグレイスケール画像なのでチャンネル数は1
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # filterのnkerns[0]は20
    layer0 = ConvLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5))
    
    layer1 = PoolLayer(layer0.output, poolsize=(2, 2))

    # filterのnkerns[1]は50
    layer2 = ConvLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5))

    layer3 = PoolLayer(layer2.output, poolsize=(2, 2))

    # layer2_input
    # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない
    # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする
    layer4_input = layer3.output.flatten(2)

    # 500ユニットの隠れレイヤー
    # layer2_inputで作成した入力ベクトルのサイズ=n_in
    layer4 = HiddenLayer(rng, input=layer4_input, n_in=nkerns[1]*4*4, n_out=n_hidden, activation=T.tanh)

    # 出力は500ユニット
    layer5 = LogisticRegression(input=layer4.output, n_in=n_hidden, n_out=10)
    
    # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる)
    cost = layer5.negative_log_likelihood(y)

    # testモデル
    # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する
    test_model = theano.function([index], layer5.errors(y), givens={x:test_set_x[index*batch_size : (index + 1)*batch_size], y: test_set_y[index*batch_size : (index + 1)*batch_size]})
    
    # validationモデル
    validate_model = theano.function([index], layer5.errors(y), givens={x:valid_set_x[index*batch_size : (index + 1)*batch_size], y: valid_set_y[index*batch_size : (index + 1)*batch_size]})

    # 微分用のパラメータ(pooling層にはパラメータがない)
    params = layer5.params + layer4.params + layer2.params + layer0.params

    # コスト関数パラメータについてのの微分
    grads = T.grad(cost, params)

    # パラメータの更新
    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]

    # trainモデル
    train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index*batch_size : (index + 1)*batch_size], y:train_set_y[index*batch_size : (index+1)*batch_size]})

    # optimize
    print "train model ..."
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience/2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    fp1 = open('log/lenet_validation_error.txt', 'w')
    fp2 = open('log/lenet_test_error.txt', 'w')

    while(epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' % (epoch, minibatch_index+1, n_train_batches, this_validation_loss*100.))
                fp1.write("%d\t%f\n" % (epoch, this_validation_loss*100))         

                if this_validation_loss < best_validation_loss:
                    if(this_validation_loss < best_validation_loss * improvement_threshold):
                        patience = max(patience, iter*patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    
                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ## 
                    print('epoch %i, minibatch %i/%i, test error %f ' % (epoch, minibatch_index+1, n_train_batches, test_score*100.))
                    fp2.write("%d\t%f\n" % (epoch, test_score*100))
            if patience <= iter:
                done_looping = True
                break
    fp1.close()
    fp2.close()        
    end_time = timeit.default_timer()
    print(('optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f') % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr,('This code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time)/60.))

    import cPickle
    cPickle.dump(layer0, open("model/cnn_layer0.pkl", "wb"))
    cPickle.dump(layer2, open("model/cnn_layer2.pkl", "wb"))
    cPickle.dump(layer4, open("model/cnn_layer4.pkl", "wb"))
    cPickle.dump(layer5, open("model/cnn_layer5.pkl", "wb"))
Пример #8
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='data/mnist.pkl.gz',
             batch_size=20,
             n_hidden=500):
    datasets = logistic_regression.load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindex
    index = T.lscalar()
    # 事例ベクトルx
    x = T.matrix('x')
    # int型の1次元ベクトル
    y = T.ivector('y')

    # ランダム変数
    rng = np.random.RandomState(1234)

    # MLPの構築
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10)

    # cost関数のシンボル 対数尤度と正則化項
    cost = classifier.negative_log_likelihood(
        y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr

    # ミニバッチごとのエラー率を計算するシンボル(test)
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # ミニバッチごとのエラー率を計算するシンボル(validation)
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # 勾配の計算 back propagation
    # gparamsに格納した変数でコストを偏微分する
    gparams = [T.grad(cost, param) for param in classifier.params]

    # パラメータの更新式のシンボル(複数の更新式を定義するときは配列にする)
    # classifierのparamとgparamsを同時にループ、paramsとその微分gparamsを使ったパラメータの更新式
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # 学習モデルでは、updatesに更新シンボルを入れてやれば良い
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print '... training'
    patience = 10000
    patience_increase = 2
    improvement_threashold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if (this_validation_loss <
                            best_validation_loss * improvement_threashold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]

                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ##
                    print('epoch %i, minibatch %i/%i, test error %f ' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print((
        'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f'
    ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test(learning_rate = 0.01,l1_reg=0.0,l2_reg=0.0001,
         n_epoch=1000,batch_size=20,hidden_units=500):
    
    dataset = load_data()
    train_x,train_y = dataset[0]
    validation_x,validation_y = dataset[1]
    test_x,test_y = dataset[2]
    
    ## compute the number of minibatches
    n_train_batches = train_x.get_value(borrow=True).shape[0] //  batch_size
    n_test_batches = test_x.get_value(borrow=True).shape[0] // batch_size
    n_validation_batches = validation_x.get_value(borrow=True).shape[0] // batch_size
    
    print 'building the model....'
    index = T.lscalar() ## index
    x = T.matrix('x')
    y = T.ivector('y') ## labels 
    random_state = np.random.RandomState(1234)
    
    classifier = MLP(random_stream = random_state,
                     input = x,
                     n_in = 28 * 28,
                     n_hidden = hidden_units,
                     n_out = 10)
   
    ## loss function (cost function) plus regularization (l1 norm and squared l2 norm)
    cost  = (
            classifier.neg_loglikelihood(y)  
            + l1_reg * classifier.L1 
            + l2_reg * classifier.L2
    )
    
    test_model = theano.function(
                inputs =[index],
                outputs = classifier.error(y),
                givens = {
                    x:test_x[index * batch_size : (index+1) * batch_size],
                    y:test_y[index * batch_size : (index+1) * batch_size]
        }
    )
    
    validation_model = theano.function(
                inputs = [index],
                outputs = classifier.error(y),
                givens ={
                    x:validation_x[index * batch_size : (index+1) * batch_size],
                    y:validation_y[index * batch_size : (index+1) * batch_size]
        }
    )
    ## gradient descent
    gparams = [T.grad(cost,params) for params in classifier.params]
    
    updates = [(params , params - learning_rate * gparams)  for params,gparams in zip(classifier.params,gparams)]
    
    train_model = theano.function(
                inputs = [index],
                outputs = cost,
                updates = updates,
                givens = {
                    x:train_x[index * batch_size : (index+1) * batch_size],
                    y:train_y[index * batch_size : (index+1) * batch_size]
        }
    )
    
    print 'complete the building model'
    print 'training the model....'
    
    ## early stopping
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches , patience //2) ## compute the validation per frequency
    best_validation_loss = np.inf
    best_iteration = 0.
    test_score = 0.
    start_time = time.time()
    epoch = 0
    looping = False
    
    while (epoch < n_epoch ) and (not looping):
        epoch +=1
        for minibatch_index in xrange(n_train_batches):
            minibatch_cost = train_model(minibatch_index)
            iteration = (epoch -1) * n_train_batches + minibatch_index
            if (iteration +1) % validation_frequency ==0: ## per validation
                validation_loss = [validation_model(i) for i in xrange(n_validation_batches)] ## compute loss per validation
                validation_loss_mean = np.mean(validation_loss)
                print ' %i epoch %i/%i minibatch , validation error %f' %(epoch,
                                                                          minibatch_index+1,
                                                                          n_train_batches,
                                                                          validation_loss_mean * 100.)
                ## got the best validation score and we predict the test dataset
                if validation_loss_mean <  best_validation_loss:
                    if (validation_loss_mean < best_validation_loss * improvement_threshold):
                        patience = max(patience, iteration * patience_increase) 
                    ## save the best validation score and itearation
                    best_validation_loss = validation_loss_mean
                    best_iteration = iteration
        
        
                    ## predict the test set
                    test_score = [test_model(i) for i in xrange(n_test_batches)]
                    test_score_mean = np.mean(test_score)
                    
                    print ' %i epoch , %i/%i minibatch , test score %f ' %(epoch,
                                                                           minibatch_index +1,
                                                                           n_train_batches,
                                                                           test_score_mean * 100.)
            if patience <= iteration:
                looping = True
                break
    
    end_time = time.time()
    print 'complete the training model'
    print 'Best validation loss %f \n Best iteration %d \n Test Score %f' %(best_validation_loss * 100 , 
                                                                            best_iteration,
                                                                            test_score_mean * 100.)
    print 'Time is %0.2f' %((end_time - start_time) / 60)
Пример #10
0
def test_autoencoder():
    learning_rate = 0.1
    training_epochs = 30
    batch_size = 20

    datasets = load_data('data/mnist.pkl.gz')

    train_set_x = datasets[0][0]

    # ミニバッチの数(教師データをbatch数で割るだけ)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindexシンボル
    index = T.lscalar()

    # ミニバッチの学習データシンボル
    x = T.matrix('x')

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    # autoencoder モデル
    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    # コスト関数と更新式のシンボル
    cost, updates = da.get_cost_updates(corruption_level=0.0,
                                        learning_rate=learning_rate)

    # trainingの関数
    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    fp = open("log/ae_cost.txt", "w")

    # training
    start_time = time.clock()
    for epoch in xrange(training_epochs):
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))
        print 'Training epoch %d, cost ' % epoch, np.mean(c)
        fp.write('%d\t%f\n' % (epoch, np.mean(c)))

    end_time = time.clock()

    training_time = (end_time - start_time)

    fp.close()

    print "The no corruption code for file " + os.path.split(
        __file__)[1] + " ran for %.2fm" % ((training_time / 60.0))

    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('log/dae_filters_corruption_00.png')
Пример #11
0
def optimize_lenet(learning_rate=0.01,
                   n_epochs=200,
                   dataset='data/mnist.pkl.gz',
                   batch_size=500,
                   n_hidden=500,
                   nkerns=[20, 50],
                   rng=np.random.RandomState(23455)):
    print '... load training set'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    # ミニバッチのindex
    index = T.lscalar()

    # dataシンボル
    x = T.matrix('x')
    # labelシンボル
    y = T.ivector('y')

    print '... building the model'
    # LeNetConvPoolLayerと矛盾が起きないように、(batch_size, 28*28)にラスタ化された行列を4DTensorにリシェイプする
    # 追加した1はチャンネル数
    # ここではグレイスケール画像なのでチャンネル数は1
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # layer0
    # filterのnkerns[0]は20
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # layer1
    # filterのnkerns[1]は50
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # layer2_input
    # layer1の出力は4x4ピクセルの画像が50チャンネル分4次元Tensorで出力されるが、多層パーセプトロンの入力にそのまま使えない
    # 4x4x50=800次元のベクトルに変換する(batch_size, 50, 4, 4)から(batch_size, 800)にする
    layer2_input = layer1.output.flatten(2)

    # layer2
    # 500ユニットの隠れレイヤー
    # layer2_inputで作成した入力ベクトルのサイズ=n_in
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=n_hidden,
                         activation=T.tanh)

    # layer3
    # 出力は500ユニット
    layer3 = LogisticRegression(input=layer2.output, n_in=n_hidden, n_out=10)

    # cost(普通の多層パーセプトロンは正則化項が必要だが、CNNは構造自体で正則化の効果を含んでいる)
    cost = layer3.negative_log_likelihood(y)

    # testモデル
    # 入力indexからgivensによって計算した値を使ってlayer3.errorsを計算する
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # validationモデル
    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # 微分用のパラメータ
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # コスト関数パラメータについてのの微分
    grads = T.grad(cost, params)

    # パラメータの更新
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    # trainモデル
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # optimize
    print "train model ..."
    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    fp1 = open('log/lenet_validation_error.txt', 'w')
    fp2 = open('log/lenet_test_error.txt', 'w')

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                fp1.write("%d\t%f\n" % (epoch, this_validation_loss * 100))

                if this_validation_loss < best_validation_loss:
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]

                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ##
                    print('epoch %i, minibatch %i/%i, test error %f ' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    fp2.write("%d\t%f\n" % (epoch, test_score * 100))
            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print((
        'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f'
    ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    fp1.close()
    fp2.close()

    import cPickle
    cPickle.dump(layer0, open("model/lenet_layer0.pkl", "wb"))
    cPickle.dump(layer1, open("model/lenet_layer1.pkl", "wb"))
Пример #12
0
def optimize_stacked_autoencoder(n_ins=28 * 28,
                                 hidden_layers_sizes=[1000, 1000, 1000],
                                 n_outs=10,
                                 corruption_levels=[0.1, 0.2, 0.3],
                                 pretraining_epochs=30,
                                 pretrain_lr=0.001,
                                 training_epochs=1000,
                                 finetune_lr=0.1,
                                 dataset='data/mnist.pkl.gz',
                                 batch_size=1):
    """ 各事前学習のエポック数、事前学習の学習率、finetuneのエポック数、finetuneの学習率、学習データああセット、ミニバッチサイズ"""
    assert len(hidden_layers_sizes) == len(corruption_levels)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # 教師バッチ数
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    numpy_rng = np.random.RandomState(89677)

    print "building the model ..."

    sda = SdA(numpy_rng=numpy_rng,
              n_ins=n_ins,
              hidden_layers_sizes=hidden_layers_sizes,
              n_outs=n_outs)

    print "getting the pretraining functions ..."

    pretraining_functions = sda.pretraining_functions(train_set_x=train_set_x,
                                                      batch_size=batch_size)

    print "pre-training the model ..."

    start_time = timeit.default_timer()

    # 層ごとにAutoEncode
    for i in xrange(sda.n_layers):
        for epoch in xrange(pretraining_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_functions[i](
                    index=batch_index,
                    corruption=corruption_levels[i],
                    lr=pretrain_lr))
            print "Pre-training layer %i, epoch %d, cost %f" % (i, epoch,
                                                                np.mean(c))
    end_time = timeit.default_timer()
    training_time = end_time - start_time
    print "The pretraining code for file %s ran for %.2fm" % (
        os.path.split(__file__)[1], training_time / 60.0)

    # AutoEncodeされたネットワークをfinetuningする関数を取得
    print "get the finetuning functions ..."

    if datasets is None:
        print 'dataset is None'
    if batch_size is None:
        print 'batch_size is None'
    if finetune_lr is None:
        print 'finetune_lr is None'

    train_model, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print "fine-tuning the model ..."
    patience = 10 * n_train_batches
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    fp1 = open('log/SdA_validation_error.txt', 'w')
    fp2 = open('log/SdA_test_error.txt', 'w')

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                ## validationのindexをvalidationのエラー率を計算するfunctionに渡し、配列としてかえす
                validation_losses = validate_model()
                # 平均してscoreにする
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f ' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                fp1.write("%d\t%f\n" % (epoch, this_validation_loss * 100))

                if this_validation_loss < best_validation_loss:
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    ## testのindex をtestのエラー率を計算するfunctionに渡し、配列として渡す
                    test_losses = test_model()

                    ## 平均してscoreにする
                    test_score = np.mean(test_losses)
                    ##
                    print('epoch %i, minibatch %i/%i, test error %f ' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
                    fp2.write("%d\t%f\n" % (epoch, test_score * 100))
            if patience <= iter:
                done_looping = True
                break
    fp1.close()
    fp2.close()
    end_time = timeit.default_timer()
    print((
        'optimization complete. Best validation score of %f obtained at iteration %i, with test performance %f'
    ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('This code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Пример #13
0
def train_process(cost_norm_reg_l1=0.00,cost_norm_reg_l2=0.001,#used for L1(or L2)-norm regulation 
                  learning_rate=0.13,
                  batch_size = 500,#using stochastic gradient descent with mini-batch
                  epochs = 1000,#define how many times we pass the training data
                  validate_frequency = None#validate data after how many patches we trained 
                  ):

    #loading training,validate,test data
    training_data,validate_data,test_data = load_data(r"mnist.pkl.gz")
    #train config
    n_train_batch = int(training_data.feature.get_value().shape[0] / batch_size) 
    n_validate_batch = int(validate_data.feature.get_value().shape[0] / batch_size) 
    n_test_batch = int(test_data.feature.get_value().shape[0] / batch_size)
    if validate_frequency is None:
        validate_frequency = n_train_batch
    '''compile:train,validate,test function  '''
    #compile train function
    x = T.fmatrix('x')
    y = T.ivector('y')
    index  = T.lscalar('index')
    #set n_in = 28*28 n_hidden = 500,n_out = 10,reg_l1 = 0.00 reg_l2 = 0.001
    mlp = MLP(x,28*28,500,10)
    cost = mlp.get_reg_cost(y, cost_norm_reg_l1,cost_norm_reg_l2)
    t_params = T.grad(cost,mlp.params)
    updates = [(param,param-learning_rate*t_param) for (param,t_param) in
              zip(mlp.params,t_params)]
    train = function(inputs = [index],
             outputs = [mlp.Last_layer.get_errors(y)],
             updates = updates,
             givens = [
                       (x,training_data.feature[index*batch_size:(index+1)*batch_size]),
                       (y,training_data.label[index*batch_size:(index+1)*batch_size]),]
             )
    #compile validate function
    validate = function(inputs = [index],
             outputs = [mlp.Last_layer.get_errors(y)],
             givens = [
                       (x,validate_data.feature[index*batch_size:(index+1)*batch_size]),
                       (y,validate_data.label[index*batch_size:(index+1)*batch_size]),]
             )
    #conpile test function
    test = function(inputs = [index],
             outputs = [mlp.Last_layer.get_errors(y)],
             givens = [
                       (x,test_data.feature[index*batch_size:(index+1)*batch_size]),
                       (y,test_data.label[index*batch_size:(index+1)*batch_size]),]
             )   
    #begin training process
    best_error = np.inf
    epoch  = 0
    patience = 10000
    patience_increase = 2
    error_significant = 0.01
    stop_training = False
    while epoch < epochs and not stop_training:
        epoch += 1
        for index in  range(n_train_batch):
            error = train(index)
#             print('error:{}'.format(error))
            passed_batches = (epoch-1)*n_train_batch + index+1
            if passed_batches%validate_frequency==0:
                #pass the validate data
                val_error = np.mean([validate(i) for i in range(n_validate_batch)])
                print("pass validate with validation_error:{}  current iteration:{}/{}".format(
                            val_error,passed_batches,min(patience,epochs*n_train_batch)))
                if val_error < best_error:#when get a better results
                    if val_error <= best_error*(1-error_significant):
                        patience = max(patience,passed_batches*patience_increase)
                    best_error = val_error#update error 
                    #pass the test data
                    test_error = np.mean([test(i) for i in range(n_test_batch)])
                    print("model improves with test accuray:%{:2}".format(100*(1-test_error)))
                    mlp.save()
            if passed_batches>patience:
                stop_training = True
Пример #14
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
        batch_size=20, n_hidden=500):
    '''
    Stochastic gradient descent optimization for a multilayer perception

    @learning_rate
    -type : float
    -param : learning rate used

    @L1_reg
    -type : float
    -param : L1-norm's weight when added to the cost

    @L2_reg
    -type : float
    -param : L2-norm's weight when added to the cost

    @n_epochs
    -type : int
    -param : maximal number of epochs to run the optimizer
    '''
    datasets = load_data()
    
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building the model'

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    rng = np.random.RandomState(1234)

    classifier = MLP(
            rng=rng,
            input=x,
            n_in=28 * 28,
            n_hidden=n_hidden,
            n_out=10
    )

    cost = (
            classifier.negative_log_likelihood(y)
            + L1_reg * classifier.L1
            + L2_reg * classifier.L2_sqr
    )

    test_model = theano.function(
            inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: test_set_x[index * batch_size:(index + 1) * batch_size],
                y: test_set_y[index * batch_size:(index + 1) * batch_size]
            }
    )

    in_sample_test_model = theano.function(
            inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
    )

    validate_model = theano.function(
            inputs=[index],
            outputs=classifier.errors(y),
            givens={
                x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                y: valid_set_y[index * batch_size:(index + 1) * batch_size]
            }
    )

    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size:(index + 1) * batch_size],
                y: train_set_y[index * batch_size:(index + 1) * batch_size]
            }
    )

    print '... training'

    patience = 10000
    patience_increase = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    x_axis = []
    y_axis = []
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = [validate_model(i) for i
                        in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                print(
                        'epoch %i, minibatch %i/%i, validation error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100.
                        )
                )

                if this_validation_loss < best_validation_loss:
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = [test_model(i) for i 
                            in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                        'best model %f %%') %
                        (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100.))

                    x_axis.append(epoch)
                    y_axis.append(test_score * 100.)

                if patience <= iter:
                    done_looping = True
                    break

    in_sample_losses = [in_sample_test_model(i) for i
            in xrange(n_train_batches)]
    in_sample_score = np.mean(in_sample_losses)
    print('##in sample test error of %f %%' % (in_sample_score * 100.))

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
          'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
            os.path.split(__file__)[1] +
            ' ran for %.2fm' % ((end_time - start_time) / 60.))

    plt.plot(np.asarray(x_axis), np.asarray(y_axis))
    plt.show()