Exemplo n.º 1
0
def test_gradient_updates_Adam():
    # Find minimum of a parabola
    x = T.matrix('x')
    w = theano.shared(100.0,borrow=True)
    h = T.dot(x,w)
    cost=T.mean(h**2)
    
    updates=cnn.gradient_updates_Adam(cost,[w],10)
    
    f=theano.function([x],cost,updates=updates)
    for i in range(100):
        cost_i=f(np.ones((1,1)))
    assert cost_i < 0.06
    
    x2 = T.matrix('x2')
    w2 = theano.shared(10.0,borrow=True)
    h2 = T.dot(x2,w2)
    cost2 = T.mean(T.sin(h2)**2+0.1*h2**2)

    updates2=cnn.gradient_updates_Adam(cost2,[w2],0.1)

    f2=theano.function([x2],cost2,updates=updates2)
    for i in range(200):
        cost_i2=f2(np.ones((1,1)))
    assert cost_i2 < 1
Exemplo n.º 2
0
def test_fullyConnectedLayer():
    '''
    Test that the fully connected layer works. This trains sine function 
    for a FCNN with one hidden layer of 4 units. For visualization check test.py.
    NOTE: Activations are done out of FC layer, since for atomic calculations
          linear activation is used.
    '''
    pi=3.14159265358

    xtrain=np.linspace(0,7,300)
    ytrain=np.sin(xtrain)

    Xtrain=np.zeros((300,1))
    for i in range(300):
        Xtrain[i]=xtrain[i]

    Ytrain=np.sin(Xtrain)


    rng = np.random.RandomState(23455)

    x=T.matrix('x')
    y=T.matrix('y')

    [hout, params_1] = cnn.fullyConnectedLayer(
        rng=rng,
        data_input=x,
        num_in=1,
        num_out=4)

    [y_pred_lin, params_2] = cnn.fullyConnectedLayer(
        rng=rng,
        data_input=T.tanh(hout),
        num_in=4,
        num_out=1)
    y_pred=T.tanh(y_pred_lin)

    cost=cnn.MSE(y,y_pred)

    params = params_1 + params_2

    updates = cnn.gradient_updates_Adam(cost,params,0.05)

    train = theano.function(
        inputs=[x,y],
        outputs=[cost],
        updates=updates)
    
    for i in range(2000):
        cost_i=train(Xtrain,Ytrain)
    assert cost_i[0] < 0.015
Exemplo n.º 3
0
def TrainCNN():

    # Training, validation and test data
    valid_set_x, valid_set_y, valid_set = load_data.shared_dataset(
        datapar.Xval, datapar.Yval, sample_size=hyppar.Nval)
    train_set_x, train_set_y, train_set = load_data.shared_dataset(
        datapar.Xtrain, datapar.Ytrain, sample_size=hyppar.Ntrain)
    test_set_x = load_data.shared_testset(datapar.Xtest)

    # Hyperparameters
    learning_rate = hyppar.learning_rate
    num_epochs = hyppar.Nepoch
    num_filters = hyppar.Nchannel
    mini_batch_size = hyppar.mbs
    reg = hyppar.reg

    # Random set for following activations
    rset = rd.sample(range(valid_set_x.get_value(borrow=True).shape[0]),
                     mini_batch_size)
    print(rset)
    # Seeding the random number generator
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_valid_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    print('train: %d batches, validation: %d batches' %
          (n_train_batches, n_valid_batches))

    # mini-batch index
    mb_index = T.lscalar()
    # Coulomb matrices ( mini_batch_size x 80 x 80 matrix)
    x = T.matrix('x')
    # Target energies (1 x mini_batch_size)
    y = T.matrix('y')

    print('***** Constructing model ***** ')

    # Reshaping tensor of mini_batch_size set of images into a
    # 4-D tensor of dimensions: mini_batch_size x 1 x 80 x 80
    layer0_input = x.reshape((mini_batch_size, 1, 80, 80))

    # Define the CNN function
    E_pred, cn_output, params = CNNStructure(layer0_input, mini_batch_size,
                                             rng)

    # Cost that is minimised during stochastic descent. Includes regularization
    cost = cnn.MSE(y, E_pred)

    L2_reg = 0
    for i in range(len(params)):
        L2_reg = L2_reg + T.mean(T.sqr(params[i][0]))

    cost = cost + reg * L2_reg

    # Creates a Theano function that computes the mistakes on the validation set.
    # This performs validation.

    # Note: the givens parameter allows us to separate the description of the
    # Theano model from the exact definition of the inputs variable. The 'key'
    # that is passed to the graph is subsituted with the data from the givens
    # parameter. In this demo we built the model with a regular Theano tensor
    # and we use givens to speed up the GPU. We swap the input index with a
    # slice corresponding to the mini-batch of the dataset to use.

    # mb_index is the mini_batch_index
    valid_model = theano.function(
        [mb_index],
        cost,
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            valid_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    predict = theano.function(
        [mb_index],
        E_pred,
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    test_model = theano.function(
        [mb_index],
        E_pred,
        givens={
            x:
            test_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size]
        })

    get_activations = theano.function([],
                                      cn_output,
                                      givens={x: valid_set_x[rset]})

    # Creates a function that updates the model parameters by SGD.
    # The updates list is created by looping over all
    # (params[i], grads[i]) pairs.
    updates = cnn.gradient_updates_Adam(cost, params, learning_rate)

    # Create a Theano function to train our convolutional neural network.
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x:
            train_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            train_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    iter = 0
    epoch = 0
    cost_ij = 0
    valid_losses = [valid_model(i) for i in range(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    train_error = []
    valid_error = []

    statistics.saveParameters(params)

    # This is where we call the previously defined Theano functions.
    print('***** Training model *****')
    while (epoch < num_epochs):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            # Compute number of iterations performed or total number
            # of mini-batches executed.
            iter = (epoch - 1) * n_train_batches + minibatch_index

            # Perform the training of our convolution neural network.
            # Obtain the cost of each minibatch specified using the
            # minibatch_index.
            cost_ij = train_model(minibatch_index)

            if iter % 10 == 0:
                statistics.saveParameters(params)
            if iter % 2 == 0:
                activations = get_activations()
                statistics.saveActivations(activations)

            # Save training error
            train_error.append(float(cost_ij))

            valid_losses = [valid_model(i) for i in range(n_valid_batches)]
            # Compute the mean prediction error across all the mini-batches.
            valid_score = np.mean(valid_losses)
            # Save validation error
            valid_error.append(valid_score)

            print("Iteration: " + str(iter + 1) + "/" +
                  str(num_epochs * n_train_batches) + ", training error: " +
                  str(cost_ij) + ", validation error: " + str(valid_score))

            if (iter % 20 == 0):
                # Get predicted energies from validation set
                E = np.zeros((n_valid_batches * mini_batch_size, 1))
                step = 0
                for i in range(n_valid_batches):
                    buf = predict(i)
                    for j in range(mini_batch_size):
                        E[step, 0] = buf[j]
                        step = step + 1
                np.savetxt('output/E_pred_' + str(iter) + '.txt', E)

    # Predict energies for test set
    E_test = np.zeros((n_test_batches * mini_batch_size, 1))
    step = 0
    for i in range(n_test_batches):
        buf = test_model(i)
        for j in range(mini_batch_size):
            E_test[step, 0] = buf[j]
            step = step + 1

    statistics.writeActivations()
    # Return values:
    statistics.saveParameters(params)
    statistics.writeParameters()
Exemplo n.º 4
0
def visualize_MISTtraining():
    '''
    A function to demonstrate how convolutional and fully 
    connected layers are used to train CNN to learn to label MNIST
    digits. 

    Same function is used in testing, without any output. 

    Downloads data from online, if mnist zip file is dot present.

    More plotting features and such should be included, now only
    terminal output.

    Benchmark error on test set with current settings:  0.0445
    '''
    dataset = 'mnist.pkl.gz'
    data_dir, data_file = os.path.split(dataset)
    rd.seed(23455)
    # Check if data file present
    if data_dir == "" and not os.path.isfile(dataset):
        new_path = os.path.join('', dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    # Download the file from MILA if not present
    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz')
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('***** Loading data *****')
    # Open the file
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    test_x, test_y = test_set

    # Load data into tensors
    train_size = 6000
    test_set_x, test_set_y_float, test_set = load_data.shared_dataset(
        test_x, test_y, sample_size=train_size // 3)
    valid_set_x, valid_set_y_float, valid_set = load_data.shared_dataset(
        valid_x, valid_y, sample_size=train_size // 3)
    train_set_x, train_set_y_float, train_set = load_data.shared_dataset(
        train_x, train_y, sample_size=train_size)

    train_set_y = T.cast(train_set_y_float, 'int32')
    valid_set_y = T.cast(valid_set_y_float, 'int32')
    test_set_y = T.cast(test_set_y_float, 'int32')

    # Training set dimension: 6000 x 784
    print('Training set: %d samples' %
          (train_set_x.get_value(borrow=True).shape[0]))
    # Test set dimension: 2000 x 784
    print('Test set: %d samples' %
          (test_set_x.get_value(borrow=True).shape[0]))
    # Validation set dimension: 2000 x 784
    print('Validation set: %d samples' %
          (valid_set_x.get_value(borrow=True).shape[0]))
    print('The training set looks like this: ')
    print(train_set[0])
    print('The labels looks like this:')
    print(train_set[1])

    # set learning rate used for Stochastic Gradient Descent
    learning_rate = 0.005
    # set number of training epochs
    num_epochs = 4
    # set number of kernels for each convolution layer
    # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50
    num_filters = [9]
    # set mini-batch size to be used
    mini_batch_size = 50

    # Seeding the random number generator
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_valid_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    print('train: %d batches, test: %d batches, validation: %d batches' %
          (n_train_batches, n_test_batches, n_valid_batches))

    # mini-batch index
    mb_index = T.lscalar()
    # rasterised images
    x = T.matrix('x')
    # image labels
    y = T.ivector('y')

    print('***** Constructing model ***** ')

    # Reshaping matrix of mini_batch_size set of images into a
    # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28
    layer0_input = x.reshape((mini_batch_size, 1, 28, 28))

    # First convolution and pooling layer
    # 4D output tensor is of shape:
    # mini_batch_size x 9 x 12 x 12
    [layer0_output,
     layer0_params] = cnn.convLayer(rng,
                                    data_input=layer0_input,
                                    image_spec=(mini_batch_size, 1, 28, 28),
                                    filter_spec=(num_filters[0], 1, 5, 5),
                                    pool_size=(2, 2),
                                    activation=T.tanh)

    # Flatten the output into dimensions:
    # mini_batch_size x 1296
    fc_layer_input = layer0_output.flatten(2)

    # The fully connected layer operates on a matrix of
    # dimensions: mini_batch_size x 1296
    # It clasifies the values using the softmax function.
    [y_lin, fc_layer_params
     ] = cnn.fullyConnectedLayer(rng,
                                 data_input=fc_layer_input,
                                 num_in=num_filters[0] * 12 * 12,
                                 num_out=10)

    # The likelihood of the categories
    p_y_given_x = T.nnet.softmax(y_lin)
    # Predictions
    y_pred = T.argmax(p_y_given_x, axis=1)

    # Cost that is minimised during stochastic descent.
    cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x)

    # Creates a Theano function that computes the mistakes on the validation set.
    # This performs validation.

    # Note: the givens parameter allows us to separate the description of the
    # Theano model from the exact definition of the inputs variable. The 'key'
    # that is passed to the graph is subsituted with the data from the givens
    # parameter. In this demo we built the model with a regular Theano tensor
    # and we use givens to speed up the GPU. We swap the input index with a
    # slice corresponding to the mini-batch of the dataset to use.

    # mb_index is the mini_batch_index
    valid_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x:
            valid_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            valid_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    # Create a Theano function that computes the mistakes on the test set.
    # This evaluated our model's accuracy.
    test_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x:
            test_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size],
            y:
            test_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                       mini_batch_size]
        })

    # List of parameters to be fit during training
    params = fc_layer_params + layer0_params
    # Creates a list of gradients
    grads = T.grad(cost, params)

    # Creates a function that updates the model parameters by SGD.
    # The updates list is created by looping over all
    # (params[i], grads[i]) pairs.
    #updates = [(param_i, param_i - learning_rate * grad_i)
    #           for param_i, grad_i in zip(params, grads)]

    updates = cnn.gradient_updates_Adam(cost, params, learning_rate)

    # Create a Theano function to train our convolutional neural network.
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x:
            train_set_x[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size],
            y:
            train_set_y[mb_index * mini_batch_size:(mb_index + 1) *
                        mini_batch_size]
        })

    # Some code to help with the plotting.
    # You don't need to go through the plotting
    # code in detail.
    iter = 0
    epoch = 0
    cost_ij = 0

    train_costs = []
    valid_accuracy = []
    # This is where we call the previously defined Theano functions.
    print('***** Training model *****')
    while (epoch < num_epochs):
        print('epoch: ' + str(epoch))
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            # Compute number of iterations performed or total number
            # of mini-batches executed.
            iter = (epoch - 1) * n_train_batches + minibatch_index

            # Perform the training of our convolution neural network.
            # Obtain the cost of each minibatch specified using the
            # minibatch_index.
            cost_ij = train_model(minibatch_index)
            print('iter: ' + str(iter) + ', cost_ij: ' + str(cost_ij))
            train_costs.append(cost_ij)
        # Compute the prediction error on each validation mini-batch by
        # calling the previously defined Theano

        valid_losses = [valid_model(i) for i in range(n_valid_batches)]

        # Compute the mean prediction error across all the mini-batches.
        valid_score = np.mean(valid_losses)
        valid_accuracy.append(valid_score)

    print('***** Training Complete *****')

    test_losses = [test_model(i) for i in range(n_test_batches)]
    # Compute the mean prediction error across all the mini-batches.
    test_score = np.mean(test_losses)

    print('Accuracy on the test set: ' + str(test_score))

    fig, (ax1, ax2) = plt.subplots(2)
    ax1.plot(train_costs)
    ax2.plot(valid_accuracy)
    plt.show()
Exemplo n.º 5
0
def visualize_sinetraining(Npoints, Nnodes, Nsteps, learning_rate):

    pi = 3.14159265358

    xtrain = np.linspace(0, 7, Npoints)
    ytrain = np.sin(xtrain)

    Xtrain = np.zeros((Npoints, 1))
    for i in range(Npoints):
        Xtrain[i] = xtrain[i]

    Ytrain = np.sin(Xtrain)

    rng = np.random.RandomState(23455)

    x = T.matrix('x')
    y = T.matrix('y')

    [hout, params_1] = cnn.fullyConnectedLayer(rng=rng,
                                               data_input=x,
                                               num_in=1,
                                               num_out=Nnodes)

    [y_pred_lin, params_2] = cnn.fullyConnectedLayer(rng=rng,
                                                     data_input=T.tanh(hout),
                                                     num_in=Nnodes,
                                                     num_out=1)
    y_pred = T.tanh(y_pred_lin)
    cost = cnn.MSE(y, y_pred)

    params = params_1 + params_2

    updates = cnn.gradient_updates_Adam(cost, params, learning_rate)

    train = theano.function(inputs=[x, y],
                            outputs=[y_pred, cost, hout],
                            updates=updates)

    plt.ion()
    fig = plt.figure()
    ax1 = fig.add_subplot(311)
    ax2 = fig.add_subplot(312)
    ax3 = fig.add_subplot(313)
    ax1.plot(xtrain, ytrain)

    #plt.ion()

    ax1.plot(xtrain, ytrain, 'b-')
    errors = []
    for i in range(Nsteps):
        pred, cost_i, hout = train(Xtrain, Ytrain)
        if (i % 20 == 0):
            ax1.clear()
            line11, = ax1.plot(xtrain, ytrain, 'b--', label='Inline label')
            line12, = ax1.plot(Xtrain, pred, 'r-', label='Inline label')
            line11.set_label('Training data')
            line12.set_label('prediction')
            ax1.legend()
            ax1.set_title('Prediction')
            errors.append(cost_i)
            ax3.clear()
            line31, = ax3.plot(errors, 'r-+', label='Inline label')
            ax3.set_title('Error')
            line31.set_label(str(cost_i))
            ax3.legend()
            ax2.clear()
            houtbout = T.tanh(hout)
            ax2.plot(Xtrain, houtbout)
            ax2.set_title('Activations')
            fig.canvas.draw()
            plt.pause(0.05)
    fig.canvas.draw()
    print('Final error: ' + str(cost_i))
Exemplo n.º 6
0
def test_convLayer():
    '''
    Constructs a CNN with one convolutional and one fully connected layer.
    Then the function trains the network to interpret MNIST digits. Same 
    script with some output and plotting features is found from "test.py".
    
    Calls two functions for cost and accuracy from above.

    Test: digit labeling accuracy > 92%

    NOTE: Valid set is not present here.
    '''

    dataset = 'mnist.pkl.gz'
    data_dir, data_file = os.path.split(dataset)
    rd.seed(23455)
    # Check if data file present
    if data_dir == "" and not os.path.isfile(dataset):
        new_path = os.path.join('', dataset)
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    # Download the file from MILA if not present                                                                 
    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        from six.moves import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
            )
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

    print('***** Loading data *****')
    # Open the file                                                                                              
    with gzip.open(dataset, 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)

    train_x,train_y=train_set
    test_x,test_y=test_set

    # Load data into tensors                                                                                     
    train_size = 6000
    test_set_x, test_set_y_float, test_set = load_data.shared_dataset(
        test_x,test_y,
        sample_size=train_size//3
        )
    train_set_x, train_set_y_float, train_set = load_data.shared_dataset(
        train_x,train_y,
        sample_size=train_size
        )

    train_set_y=T.cast(train_set_y_float,'int32')
    test_set_y=T.cast(test_set_y_float,'int32')

    # set learning rate used for Stochastic Gradient Descent                                                     
    learning_rate = 0.005
    # set number of training epochs                                                                              
    num_epochs = 4
    # set number of kernels for each convolution layer                                                           
    # for e.g. 2 layers - [20, 50]. layer1 = 20, layer2 = 50                                                     
    num_filters = [9]
    # set mini-batch size to be used                                                                             
    mini_batch_size = 50


    # Seeding the random number generator                                                                        
    rng = np.random.RandomState(23455)

    # Computing number of mini-batches                                                                           
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= mini_batch_size
    n_test_batches //= mini_batch_size

    # mini-batch index                                                                                           
    mb_index = T.lscalar()
    # rasterised images                                                                                          
    x = T.matrix('x')
    # image labels                                                                                               
    y = T.ivector('y')

    # Reshaping matrix of mini_batch_size set of images into a                                                   
    # 4-D tensor of dimensions: mini_batch_size x 1 x 28 x 28                                                    
    layer0_input = x.reshape((mini_batch_size, 1, 28, 28))

    # First convolution and pooling layer                                                                        
    # 4D output tensor is of shape:                                                                              
    # mini_batch_size x 9 x 12 x 12                                                                              
    [layer0_output, layer0_params] = cnn.convLayer(
        rng,
        data_input=layer0_input,
        image_spec=(mini_batch_size, 1, 28, 28),
        filter_spec=(num_filters[0], 1, 5, 5),
        pool_size=(2, 2),
        activation=T.tanh)
    
    # Flatten the output into dimensions:
    # mini_batch_size x 1296                                                                                     
    fc_layer_input = layer0_output.flatten(2)

    # The fully connected layer operates on a matrix of                                                          
    # dimensions: mini_batch_size x 1296                                                                         
    # It clasifies the values using the softmax function.                                                        
    [y_lin, fc_layer_params] = cnn.fullyConnectedLayer(
        rng,
        data_input=fc_layer_input,
        num_in=num_filters[0]*12*12,
        num_out=10)

    # The likelihood of the categories                                                                           
    p_y_given_x = T.nnet.softmax(y_lin)
    # Predictions                                                                                                
    y_pred =  T.argmax(p_y_given_x,axis=1)

    # Cost that is minimised during stochastic descent.                                                          
    cost = negative_log_lik(y=y, p_y_given_x=p_y_given_x)

    # Create a Theano function that computes the mistakes on the test set.                                       
    # This evaluated our model's accuracy.                                                                       
    test_model = theano.function(
        [mb_index],
        errors(y, y_pred),
        givens={
            x: test_set_x[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ],
            y: test_set_y[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ]})

    # List of parameters to be fit during training                                                               
    params = fc_layer_params + layer0_params

    updates = cnn.gradient_updates_Adam(cost,params,learning_rate)

    # Create a Theano function to train our convolutional neural network.                                        
    train_model = theano.function(
        [mb_index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ],
            y: train_set_y[
                mb_index * mini_batch_size:
                    (mb_index + 1) * mini_batch_size
                ]})

    iter = 0
    epoch = 0
    cost_ij = 0

    # This is where we call the previously defined Theano functions.                                             
    while (epoch < num_epochs):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            cost_ij = train_model(minibatch_index)

    test_losses = [test_model(i) for i in range(n_test_batches)]
    test_score = np.mean(test_losses)
    assert test_score < 0.08