def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='../data/mnist/mnist.pkl.gz', batch_size=20, n_hidden=500): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = np.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
if __name__ == '__main__': df = read_csv('./../data/africa-soil/training.csv') x = df.as_matrix(columns=df.columns[1:3595]) x[:, -1] = (x[:, -1] == 'Topsoil') * 1.0 x = x.astype(float) y = df.as_matrix(columns=df.columns[3595:]) y = y.astype(float) idx_train = list( np.random.choice(range(x.shape[0]), size=int(round(0.8 * x.shape[0])))) idx_cv = list(set(range(x.shape[0])) - set(idx_train)) nn = MLP(3594, (50, 5), activation_functions=[tanh, identity], rng=(lambda n: np.random.normal(0, 0.01, n))) train_cost, cv_cost = \ nn.train_backprop(x[idx_train, :], y[idx_train, :], d_f_list=[d_tanh, d_identity], batch_size=None, max_iter=1000, learning_rate=0.001, momentum_rate=0.9, neural_local_gain=(0.0005, 0.9995, 0.001, 1000), stop_threshold=0.05, cv_input_data=x[idx_cv, :], cv_output_data=y[idx_cv, :], #regularization_rate=0.1, #regularization_norm=l2, #d_regularization_norm=d_l2
i = int(parts[1]) o = np.zeros(26) o[i] = 1.0 if len(parts) == 5 and parts[2] in ['7', '8']: cv_input = np.vstack([cv_input, v]) cv_output = np.vstack([cv_output, o]) elif len(parts) == 5 and parts[2] in ['5', '6']: test_input = np.vstack([cv_input, v]) test_output = np.vstack([cv_output, o]) else: train_input = np.vstack([train_input, v]) train_output = np.vstack([train_output, o]) nn = MLP(841, (100, 26), activation_functions=[tanh, softmax], rng=(lambda n: np.random.normal(0, 0.01, n))) train_cost, cv_cost = \ nn.train_backprop(train_input, train_output, d_f_list=[d_tanh, d_softmax], goal=cross_entropy, d_goal=d_cross_entropy, batch_size=1, max_iter=100, learning_rate=0.01, momentum_rate=0.9, #neural_local_gain=(0.0005, 0.9995, 0.001, 1000), stop_threshold=0.05, cv_input_data=cv_input, cv_output_data=cv_output, #regularization_rate=0.1,
parts = re.split('[-\.]', f) i = int(parts[1]) o = np.zeros(26) o[i] = 1.0 if len(parts) == 5 and parts[2] in ['7', '8']: cv_input = np.vstack([cv_input, v]) cv_output = np.vstack([cv_output, o]) elif len(parts) == 5 and parts[2] in ['5', '6']: test_input = np.vstack([cv_input, v]) test_output = np.vstack([cv_output, o]) else: train_input = np.vstack([train_input, v]) train_output = np.vstack([train_output, o]) nn = MLP(841, (100, 26), activation_functions=[sigmoid, sigmoid], rng=(lambda n: np.random.normal(0, 0.01, n))) train_cost, cv_cost = \ nn.train_backprop(train_input, train_output, d_f_list=[d_sigmoid, d_sigmoid], goal=log_Bernoulli_likelihood, d_goal=d_log_Bernoulli_likelihood, batch_size=None, max_iter=2500, learning_rate=0.1, momentum_rate=0.9, neural_local_gain=(0.005, 0.995, 0.001, 1000), stop_threshold=0.05, cv_input_data=cv_input, cv_output_data=cv_output, #regularization_rate=0.1,