def train(X, Y, nn_architecture, epochs, learning_rate): params_values = init_layers(nn_architecture, 2) cost_history = [] accuracy_history = [] for i in range(epochs): Y_hat, cache = full_forward_propagation(X, params_values, nn_architecture) # Y_hat_copy = np.copy(Y_hat) # print("Y_hat in train: ") # print(Y_hat) cost = get_cost_value(Y_hat, Y) cost_history.append(cost) accuracy = get_accuracy_value(Y_hat, Y) accuracy_history.append(accuracy) # print("Y_hat in train: ") # print(Y_hat) # print("Y_hat_copy in train: ") # print(Y_hat_copy) grads_values = full_backward_propagation(Y_hat, Y, cache, params_values, nn_architecture) # print("grads_values: ") # print(grads_values) params_values = update(params_values, grads_values, nn_architecture, learning_rate) print(params_values) print('Cost after iteration ' + str(i) + ' : ' + str(accuracy)) return params_values, cost_history, accuracy_history
def main(): train_data, train_label, test_data, test_label = load_data() layers = [ init_layers( 'nxm_conv', { 'filter_height': 1, 'filter_width': 3, 'filter_depth': 1, 'num_filters': 5 }), init_layers('relu', {}), init_layers('flatten', {}), init_layers('linear', { 'num_in': 105, 'num_out': 6 }), init_layers('relu', {}), init_layers('linear', { 'num_in': 6, 'num_out': 6 }), init_layers('softmax', {}) ] model = init_model(layers, [21, 3, 1], 6, True) params = {"test_data": test_data, "test_labels": test_label} train_model, train_loss = train(model, train_data, train_label, params, 3000)
def train_model(use_trained, output_folder, model_name='model.npz', input_mean_name='input_mean.npy', plots_suffix='_'): # Load training data train_data = load_MNIST_images('../data/train-images.idx3-ubyte') train_label = load_MNIST_labels('../data/train-labels.idx1-ubyte') im_height, im_width, num_channels, num_train = train_data.shape input_mean = np.mean(train_data, axis=-1) train_data = (train_data.T - input_mean.T).T np.save(output_folder + input_mean_name, input_mean) make_val_data(input_mean_name, output_folder) output_size = int(np.max(train_label) - np.min(train_label) + 1) batch_size = 512 learning_rate = 0.25 weight_decay = 0.0001 lr_decay = 0.98 lr_decay_step = 2 momentum_rho = 0.8 early_stop_ratio = 1e-5 save_step = 1 numIters = 300 # temp = 1 # index = np.random.choice(num_train, batch_size*temp, replace=False) # train_data = train_data[..., index]; train_label = train_label[..., index] if use_trained: model = np.load(output_folder + model_name, allow_pickle=True) model = dict(model) else: layers = [ init_layers( 'conv', { 'filter_size': 5, 'filter_depth': num_channels, 'num_filters': 6, 'weight_scale': 1, 'bias_scale': 0.1, }), init_layers('relu', {}), init_layers('pool', { 'filter_size': 2, 'stride': 2 }), init_layers( 'conv', { 'filter_size': 5, 'filter_depth': 6, 'num_filters': 10, 'weight_scale': 1, 'bias_scale': 0.1, }), init_layers('relu', {}), init_layers( 'conv', { 'filter_size': 3, 'filter_depth': 10, 'num_filters': 16, 'weight_scale': 1, 'bias_scale': 0.1, }), init_layers('relu', {}), init_layers('pool', { 'filter_size': 2, 'stride': 2 }), init_layers('flatten', {}), init_layers( 'linear', { 'num_in': 3 * 3 * 16, 'num_out': 120, # 'weight_scale': 0.0, # 'bias_scale': 0 }), init_layers('relu', {}), init_layers( 'linear', { 'num_in': 120, 'num_out': 84, 'weight_scale': 1, # 'bias_scale': 0 }), init_layers('relu', {}), init_layers( 'linear', { 'num_in': 84, 'num_out': output_size, 'weight_scale': 1, # 'bias_scale': 0 }), init_layers('softmax', {}) ] model = { 'layers': layers, 'input_size': [im_height, im_width, num_channels], 'output_size': output_size } params = { 'learning_rate': learning_rate, 'weight_decay': weight_decay, 'batch_size': batch_size, 'save_file': model_name, 'lr_decay': lr_decay, 'lr_decay_step': lr_decay_step, 'momentum_rho': momentum_rho, 'early_stop_ratio': early_stop_ratio, 'save_step': save_step, 'output_folder': output_folder } start = time.time() model, train_loss, train_accuracy, val_loss, val_accuracy = train( model, train_data, train_label, params, numIters) stop = time.time() print("Done training, time used: {:0.1f} min".format((stop - start) / 60)) np.save(output_folder + 'training_loss_file', train_loss) np.save(output_folder + 'training_accuracy_file', train_accuracy) np.save(output_folder + 'val_loss_file', val_loss) np.save(output_folder + 'val_accuracy_file', val_accuracy) # plot learning and test(val) loss curve train_iter_range = np.array(range(1, numIters + 1)) val_iter_range = np.array(range(save_step, numIters + 1, save_step)) fig, ax = plt.subplots() ax.plot(train_iter_range, train_loss, 'b', label='training') ax.plot(val_iter_range, val_loss, 'r', label='testing') plt.xlabel('iteration') plt.ylabel('loss') legend = ax.legend(loc='upper right', shadow=True) plt.title('training and testing losses VS number of iterations') # plt.show() plt.savefig(output_folder + 'plot_losses' + plots_suffix + '.png') fig, ax = plt.subplots() ax.plot(train_iter_range, train_accuracy, 'b', label='training') ax.plot(val_iter_range, val_accuracy, 'r', label='testing') plt.xlabel('iteration') plt.ylabel('accuracy') legend = ax.legend(loc='upper left', shadow=True) plt.title('training and testing accuracies VS number of iterations') # plt.show() plt.savefig(output_folder + 'plot_accuracies' + plots_suffix + '.png')