Пример #1
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='../data/mnist/mnist.pkl.gz',
             batch_size=20,
             n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = np.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sorted in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
Пример #2
0
if __name__ == '__main__':

    df = read_csv('./../data/africa-soil/training.csv')
    x = df.as_matrix(columns=df.columns[1:3595])
    x[:, -1] = (x[:, -1] == 'Topsoil') * 1.0
    x = x.astype(float)
    y = df.as_matrix(columns=df.columns[3595:])
    y = y.astype(float)

    idx_train = list(
        np.random.choice(range(x.shape[0]), size=int(round(0.8 * x.shape[0]))))
    idx_cv = list(set(range(x.shape[0])) - set(idx_train))

    nn = MLP(3594, (50, 5),
             activation_functions=[tanh, identity],
             rng=(lambda n: np.random.normal(0, 0.01, n)))
    train_cost, cv_cost = \
        nn.train_backprop(x[idx_train, :], y[idx_train, :],
                          d_f_list=[d_tanh, d_identity],
                          batch_size=None,
                          max_iter=1000,
                          learning_rate=0.001,
                          momentum_rate=0.9,
                          neural_local_gain=(0.0005, 0.9995, 0.001, 1000),
                          stop_threshold=0.05,
                          cv_input_data=x[idx_cv, :],
                          cv_output_data=y[idx_cv, :],
                          #regularization_rate=0.1,
                          #regularization_norm=l2,
                          #d_regularization_norm=d_l2
Пример #3
0
        i = int(parts[1])
        o = np.zeros(26)
        o[i] = 1.0
        if len(parts) == 5 and parts[2] in ['7', '8']:
            cv_input = np.vstack([cv_input, v])
            cv_output = np.vstack([cv_output, o])
        elif len(parts) == 5 and parts[2] in ['5', '6']:
            test_input = np.vstack([cv_input, v])
            test_output = np.vstack([cv_output, o])
        else:
            train_input = np.vstack([train_input, v])
            train_output = np.vstack([train_output, o])


    nn = MLP(841, (100, 26),
                       activation_functions=[tanh, softmax],
                       rng=(lambda n: np.random.normal(0, 0.01, n)))
    train_cost, cv_cost = \
        nn.train_backprop(train_input, train_output,
                          d_f_list=[d_tanh, d_softmax],
                          goal=cross_entropy,
                          d_goal=d_cross_entropy,
                          batch_size=1,
                          max_iter=100,
                          learning_rate=0.01,
                          momentum_rate=0.9,
                          #neural_local_gain=(0.0005, 0.9995, 0.001, 1000),
                          stop_threshold=0.05,
                          cv_input_data=cv_input,
                          cv_output_data=cv_output,
                          #regularization_rate=0.1,
Пример #4
0
        parts = re.split('[-\.]', f)
        i = int(parts[1])
        o = np.zeros(26)
        o[i] = 1.0
        if len(parts) == 5 and parts[2] in ['7', '8']:
            cv_input = np.vstack([cv_input, v])
            cv_output = np.vstack([cv_output, o])
        elif len(parts) == 5 and parts[2] in ['5', '6']:
            test_input = np.vstack([cv_input, v])
            test_output = np.vstack([cv_output, o])
        else:
            train_input = np.vstack([train_input, v])
            train_output = np.vstack([train_output, o])

    nn = MLP(841, (100, 26),
             activation_functions=[sigmoid, sigmoid],
             rng=(lambda n: np.random.normal(0, 0.01, n)))
    train_cost, cv_cost = \
        nn.train_backprop(train_input, train_output,
                          d_f_list=[d_sigmoid, d_sigmoid],
                          goal=log_Bernoulli_likelihood,
                          d_goal=d_log_Bernoulli_likelihood,
                          batch_size=None,
                          max_iter=2500,
                          learning_rate=0.1,
                          momentum_rate=0.9,
                          neural_local_gain=(0.005, 0.995, 0.001, 1000),
                          stop_threshold=0.05,
                          cv_input_data=cv_input,
                          cv_output_data=cv_output,
                          #regularization_rate=0.1,