def test_on_parameters(n_hiddenLayers, n_epochs, showCost): # Split data into train and test sections X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_ratio, random_state=42) print(X.shape, X_train.shape, X_test.shape) #Train the network on train data params = nn.train_nn(X_train, Y_train, n_hiddenLayers, n_epochs, adaptive_lRate, showCost=showCost) #Test neural network on test data A2, cache = nn.f_propagate(X_test, params) acc = "{:.4f}".format(nn.accuracy(A2, Y_test)) print('Accuracy on test data: ' + acc + '%') #Test neural network on train data A2, cache = nn.f_propagate(X_train, params) acc = "{:.4f}".format(nn.accuracy(A2, Y_train)) print('Accuracy on train data: ' + acc + '%') # #Test neural network on all data A2, cache = nn.f_propagate(X, params) acc = "{:.4f}".format(nn.accuracy(A2, Y)) print('Accuracy on all available data: ' + acc + '%')
def train(): print("start training") mcts.MCTS.get_tree_and_edges(reset=True) print("start training") neural_network.nn_predictor.reset_nn_check_pts() nn_training_set = None iterations = 50 for _ in range(iterations): print('iterations:', iterations) player1 = player.Zero_Player('x', 'Bot_ONE', nn_type='best', temperature=1) player2 = player.Zero_Player('x', 'Bot_ONE', nn_type='best', temperature=1) self_play_game = game.Game(player1, player2) self_play_results = self_play_game.play(500) augmented_self_play_results = neural_network.augment_data_set( self_play_results) mcts.MCTS.update_mcts_edges(augmented_self_play_results) nn_training_set = neural_network.update_nn_training_set( self_play_results, nn_training_set) neural_network.train_nn(nn_training_set) player1 = player.Zero_Player('x', 'Bot_ONE', nn_type='last', temperature=0) player2 = player.Zero_Player('x', 'Bot_ONE', nn_type='best', temperature=0) nn_test_game = game.Game(player1, player2) wins_player1, wins_player2 = nn_test_game.play_symmetric(100) if wins_player1 >= wins_player2: neural_network.nn_predictor.BEST = neural_network.nn_predictor.LAST
def test_inc_and_dec(): plotX = [] plotY = [] plotZ = [] size = 10 incArr = np.linspace(1, 1.1, size) decArr = np.linspace(0.5, 1, size) for inc in range(0, size): for dec in range(0, size): acc_arr = [] adaptive_lRate = { 'InitialRate': 0.01, 'DecrementVar': decArr[dec], 'IncrementVar': incArr[inc], 'ErrorRatio': 1.04 } for i in range(1, 4): X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=randint(1, 100)) params = nn.train_nn(X_train, Y_train, 8, 500, adaptive_lRate, showCost=False) pred_test, pred_cache = nn.f_propagate(X_test, params) acc_test = nn.accuracy(pred_test, Y_test) pred_train, train_cache = nn.f_propagate(X_train, params) acc_train = nn.accuracy(pred_train, Y_train) pred_all, all_cache = nn.f_propagate(X, params) acc_all = nn.accuracy(pred_all, Y) acc_arr.append((acc_test + acc_train + acc_all) / 3) plotX.append(incArr[inc]) plotY.append(decArr[dec]) plotZ.append(np.mean(acc_arr)) #plotZ.append(acc_test) print('Inc: %f - Dec: %f - Average Acc: %f' % (incArr[inc], decArr[dec], np.mean(acc_arr))) fig = plt.figure() ax = Axes3D(fig) ax.set_xlabel('Increment') ax.set_ylabel('Decrement') ax.set_zlabel('Accuracy') surf = ax.plot_trisurf(plotX, plotY, plotZ, linewidth=0.1, cmap='summer') plt.savefig('./plots/plot.png') plt.show()
def test_range__hidden_and_epochs(hidden_start, hidden_end, hidden_step, epochs_start, epochs_end, epochs_step, iterate_n): plotX = [] plotY = [] plotZ = [] for hidden in range(hidden_start, hidden_end + 1, hidden_step): for epoch in range(epochs_start, epochs_end + 1, epochs_step): acc_arr = [] for i in range(1, iterate_n + 1): X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=randint(1, 100)) params = nn.train_nn(X_train, Y_train, hidden, epoch, const_lRate, showCost=False) pred_test, pred_cache = nn.f_propagate(X_test, params) acc_test = nn.accuracy(pred_test, Y_test) pred_train, train_cache = nn.f_propagate(X_train, params) acc_train = nn.accuracy(pred_train, Y_train) pred_all, all_cache = nn.f_propagate(X, params) acc_all = nn.accuracy(pred_all, Y) acc_arr.append((acc_test + acc_train + acc_all) / 3) plotX.append(hidden) plotY.append(epoch) plotZ.append(np.mean(acc_arr)) #plotZ.append(acc_test) print('Hidden: %i - Epoch: %i - Average Acc: %f' % (hidden, epoch, np.mean(acc_arr))) fig = plt.figure() ax = Axes3D(fig) ax.set_xlabel('Neurons in hidden layer') ax.set_ylabel('Epochs') ax.set_zlabel('Accuracy') surf = ax.plot_trisurf(plotX, plotY, plotZ, linewidth=0.1, cmap='winter') plt.savefig('./plots/plot.png') plt.show()
def test_adaptive_lRate(): rateAcc = [] singleAcc = [] ratesArr = np.linspace(0.001, 0.2, 50) for i in range(0, 50): for j in range(1, 3): adaptive_lRate = { 'InitialRate': ratesArr[i], 'DecrementVar': 0.7, 'IncrementVar': 1.05, 'ErrorRatio': 1.04 } X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=i * j) params = nn.train_nn(X_train, Y_train, 8, 500, adaptive_lRate, showCost=False) pred_test, pred_cache = nn.f_propagate(X_test, params) acc_test = nn.accuracy(pred_test, Y_test) pred_train, train_cache = nn.f_propagate(X_train, params) acc_train = nn.accuracy(pred_train, Y_train) pred_all, all_cache = nn.f_propagate(X, params) acc_all = nn.accuracy(pred_all, Y) singleAcc.append(np.mean([acc_test, acc_train, acc_all])) rateAcc.append(np.mean(singleAcc)) print('Error rate: %f, Acc: %f' % (ratesArr[i], np.mean(singleAcc))) singleAcc = [] plt.plot(ratesArr, rateAcc) plt.xlabel('Initial Rate') plt.ylabel('Accuracy') plt.savefig('./plots/plot.png') plt.show()
def test_params(n_hidden, n_epochs, n_iterations): acc_arr = [] lowest_test = lowest_train = lowest_all = 100 incidents = 0 for i in range(1, n_iterations + 1): X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=i) params = nn.train_nn(X_train, Y_train, n_hidden, n_epochs, const_lRate, showCost=False) pred_test, pred_cache = nn.f_propagate(X_test, params) acc_test = nn.accuracy(pred_test, Y_test) pred_train, train_cache = nn.f_propagate(X_train, params) acc_train = nn.accuracy(pred_train, Y_train) pred_all, all_cache = nn.f_propagate(X, params) acc_all = nn.accuracy(pred_all, Y) if (acc_test < lowest_test): lowest_test = acc_test if (acc_train < lowest_train): lowest_train = acc_train if (acc_all < lowest_all): lowest_all = acc_all if (acc_all < 90 or acc_train < 90 or acc_test < 90): incidents += 1 acc_arr.append((acc_test + acc_train + acc_all) / 3) print('Iteration: %i, Test: %f, Train: %f, All: %f' % (i, acc_test, acc_train, acc_all)) print('================================================') print( 'Average accuracy for all iterations: %f, Number of incidents (<90 acc): %i' % (np.mean(acc_arr), incidents)) print('Lowest accuracies >>> Test: %f, Train: %f, All: %f' % (lowest_test, lowest_train, lowest_all))
def test_const_lRate(l_start, l_end): rateAcc = [] singleAcc = [] ratesArr = np.linspace(l_start, l_end, 100) for i in range(0, 100): for j in range(1, 5): X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=i * j) params = nn.train_nn(X_train, Y_train, 8, 500, ratesArr[i], showCost=False) pred_test, pred_cache = nn.f_propagate(X_test, params) acc_test = nn.accuracy(pred_test, Y_test) pred_train, train_cache = nn.f_propagate(X_train, params) acc_train = nn.accuracy(pred_train, Y_train) pred_all, all_cache = nn.f_propagate(X, params) acc_all = nn.accuracy(pred_all, Y) singleAcc.append(np.mean([acc_test, acc_train, acc_all])) rateAcc.append(np.mean(singleAcc)) print('Learning_rate: %f, Acc: %f' % (ratesArr[i], np.mean(singleAcc))) singleAcc = [] plt.plot(ratesArr, rateAcc) plt.xlabel('Learning rate') plt.ylabel('Accuracy') plt.savefig('./plots/plot.png') plt.show()
def compare_rates(): plotX = [] plotConst = [] plotAdapt = [] for epoch in range(0, 400, 10): for j in range(1, 5): singleConst = [] singleAdapt = [] X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_ratio, random_state=randint(1, 100)) params_const = nn.train_nn(X_train, Y_train, 8, epoch, const_lRate, showCost=False) params_adapt = nn.train_nn(X_train, Y_train, 8, epoch, adaptive_lRate, showCost=False) pred_test_const, pred_cache = nn.f_propagate(X_test, params_const) acc_test_const = nn.accuracy(pred_test_const, Y_test) pred_train_const, train_cache = nn.f_propagate( X_train, params_const) acc_train_const = nn.accuracy(pred_train_const, Y_train) pred_all_const, all_cache = nn.f_propagate(X, params_const) acc_all_const = nn.accuracy(pred_all_const, Y) pred_test_adapt, pred_cache = nn.f_propagate(X_test, params_adapt) acc_test_adapt = nn.accuracy(pred_test_adapt, Y_test) pred_train_adapt, train_cache = nn.f_propagate( X_train, params_adapt) acc_train_adapt = nn.accuracy(pred_train_adapt, Y_train) pred_all_adapt, all_cache = nn.f_propagate(X, params_adapt) acc_all_adapt = nn.accuracy(pred_all_adapt, Y) singleConst.append( np.mean([acc_test_const, acc_train_const, acc_all_const])) singleAdapt.append( np.mean([acc_test_adapt, acc_train_adapt, acc_all_adapt])) plotX.append(epoch) plotConst.append(np.mean(singleConst)) plotAdapt.append(np.mean(singleAdapt)) print('Epoch: %f, Const acc: %f, Adaptive acc: %f' % (epoch, np.mean(singleConst), np.mean(singleAdapt))) plt.plot(plotX, plotConst, label='Constant Learning Rate') plt.plot(plotX, plotAdapt, label='Adaptive Learning Rate') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc='lower left', ncol=2, mode="expand", borderaxespad=0.) plt.savefig('./plots/plot.png') plt.show()
def test_mnist(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, normalization=True, eps=1e-4, verbose=False, smaller_set=True, loss='norm', lr_decay=False, binary=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. :type loss: string :param loss: to use hinge loss or normal loss. :type lr_decay: boolean :param lr_decay: to use learning_rate decay :type binary: boolean :param binary: to binarize the output :type normalization: boolean :param normalization: normalization output or not :type eps: float :param eps: normalization variable """ # load the dataset; download the dataset if it is not present datasets = load_data_mnist(theano_shared=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_data_y_mat = datasets[3] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels y_mat = T.matrix('y_mat') epoch = T.lscalar('epoch') rng = numpy.random.RandomState(1234) # construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10, binary=binary, normalization=normalization, eps=eps) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically # Loss can chosen as hinge loss or nll loss if loss == 'norm': cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) else: cost = classifier.logRegressionLayer.hinge(y_mat) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams # According to paper gradient is calculated using binarized weights as # same weights are used during forward propagation if binary: gparams = [T.grad(cost, param) for param in classifier.params_bin] else: gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # change learning rate depending upon input # we are following exponential decay with # learning rate = learning_rate_start * (learning_rate_final / learning_rate_start) ** (epoch/ n_epochs) if lr_decay: lr_start = learning_rate lr_final = 1e-6 updates = [ (param_i, T.cast( T.clip( param_i - (lr_start * (lr_final / lr_start)**(epoch / 25)) * grad_i, -1, 1), theano.config.floatX)) for param_i, grad_i in zip(classifier.params, gparams) ] else: updates = [(param_i, T.cast(T.clip(param_i - learning_rate * grad_i, -1, 1), theano.config.floatX)) for param_i, grad_i in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` # Dependin upon lr_decay and loss function calculation we need to pass different # parameters to training model if loss == 'norm': if lr_decay: train_model = theano.function( inputs=[index, epoch], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: if lr_decay: train_model = theano.function( inputs=[index, epoch], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y_mat: train_set_y[index * batch_size:(index + 1) * batch_size], }) else: train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y_mat: train_set_y[index * batch_size:(index + 1) * batch_size], }) ############### # TRAIN MODEL # ############### print('... training') result = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose, lr_decay) # plot_graph(result[2]) return result
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, activation_function=T.tanh, verbose=False, data_path='data/mfcc_songs_10_{}.npy'): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present datasets = load_data(data_path=data_path) train_set_x, train_set_y = datasets[0][0] valid_set_x, valid_set_y = datasets[0][1] test_set_x, test_set_y = datasets[0][2] n_output_neurons = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=1200, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=n_output_neurons, activation_function=activation_function ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, activation_function=T.tanh, verbose=False, data_path='data/mfcc_songs_10_{}.npy'): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present datasets = load_data(data_path=data_path) train_set_x, train_set_y = datasets[0][0] valid_set_x, valid_set_y = datasets[0][1] test_set_x, test_set_y = datasets[0][2] n_output_neurons = datasets[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=1200, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=n_output_neurons, activation_function=activation_function) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_cifar10(learning_rate=0.01, n_epochs=500, nkerns=[128, 256, 512], filter_shape=3, batch_size=200, verbose=False, normal=False, smaller_set=True, std_normal=2, binary=True, normalization=True, eps=1e-4): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type binary: boolean :param binary: to binarize the output :type normalization: boolean :param normalization: normalization output or not :type eps: float :param eps: normalization variable """ rng = numpy.random.RandomState(23455) datasets = load_data_cifar10(theano_shared=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_shape, filter_shape), poolsize=(1, 1), normal=normal, std_normal=std_normal, binary=binary, normalization=normalization, eps=eps) img_size = (32 - filter_shape + 1) layer00 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], img_size, img_size), filter_shape=(nkerns[0], nkerns[0], filter_shape, filter_shape), poolsize=(2, 2), normal=normal, std_normal=std_normal, binary=binary, normalization=normalization, eps=eps) # Construct the second convolutional pooling layer img_size = (img_size - filter_shape + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer00.output, image_shape=(batch_size, nkerns[0], img_size, img_size), filter_shape=(nkerns[1], nkerns[0], filter_shape, filter_shape), poolsize=(1, 1), normal=normal, std_normal=std_normal, binary=binary, normalization=normalization, eps=eps) img_size = (img_size - filter_shape + 1) layer11 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, nkerns[1], img_size, img_size), filter_shape=(nkerns[1], nkerns[1], filter_shape, filter_shape), poolsize=(2, 2), normal=normal, std_normal=std_normal, binary=binary, normalization=normalization, eps=eps) img_size = (img_size - filter_shape + 1) // 2 layer2 = LeNetConvPoolLayer(rng, input=layer11.output, image_shape=(batch_size, nkerns[1], img_size, img_size), filter_shape=(nkerns[2], nkerns[1], filter_shape, filter_shape), poolsize=(2, 2), normal=normal, std_normal=std_normal, binary=binary, normalization=normalization, eps=eps) # img_size = (img_size - filter_shape + 1) # layer22 = LeNetConvPoolLayer( # rng, # input=layer2.output, # image_shape=(batch_size, nkerns[2], img_size, img_size), # filter_shape=(nkerns[2], nkerns[2], filter_shape, filter_shape), # poolsize=(2, 2), # normal=normal, # std_normal=std_normal, # binary=binary # ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer img_size = (img_size - filter_shape + 1) // 2 layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * img_size * img_size, n_out=1024, activation=T.nnet.sigmoid, binary=binary, normalization=normalization, epsilon=eps) layer5 = HiddenLayer(rng, input=layer3.output, n_in=1024, n_out=1024, activation=T.nnet.sigmoid, binary=binary, normalization=normalization, epsilon=eps) layer6 = HiddenLayer(rng, input=layer5.output, n_in=1024, n_out=1024, activation=T.nnet.sigmoid, binary=binary, normalization=normalization, epsilon=eps) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer6.output, n_in=1024, n_out=10, binary=binary) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) #create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params params = params + layer11.params + layer00.params # + layer22.params params = params + layer5.params + layer6.params params_bin = layer4.params_bin + layer3.params_bin + layer2.params_bin + layer1.params_bin + layer0.params_bin params_bin = params_bin + layer11.params_bin + layer00.params_bin # + layer22.params_bin params_bin = params_bin + layer5.params_bin + layer6.params_bin # create a list of gradients for all model parameters grads = T.grad(cost, params_bin) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ # (param_i, param_i - learning_rate * grad_i) (param_i, T.cast(T.clip(param_i - learning_rate * grad_i, -1, 1), theano.config.floatX)) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') result = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return result
df = fe.read_csv(FILEPATH_X) df = fe.one_hot_encode(df, ['Formation']) df = fe.one_hot_encode(df, ['YardLineDirection']) df = fe.one_hot_encode(df, ['OffenseTeam']) df = fe.one_hot_encode(df, ['DefenseTeam']) df = fe.run_pca(df) labels = fe.read_csv(FILEPATH_Y) labels = fe.label_encode(labels) X_train, X_test, y_train, y_test = fe.split_train_test(df, labels, TEST_SIZE) num_features = fe.get_num_features(X_train) classifier = nn.build_nn(num_features) classifier = nn.compile_nn(classifier) nn.train_nn(classifier, X_train, y_train) #classifier.save('play_predictor.h5') #from keras.models import load_model #import neural_network as nn #classifier = load_model('play_predictor.h5') #cm, y_pred = nn.test_nn(classifier, X_test, y_test) #print(cm)