def test_mnist(): (train_x, train_y), (test_x, test_y) = mnist.load_data() val_x = train_x[50000:] val_y = train_y[50000:] train_x = train_x[:50000] train_y = train_y[:50000] batch_size = 200 modle = models.Sequential() modle.add(layers.Linear(28, input_shape=(None, train_x.shape[1]))) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.Softmax()) acc = losses.categorical_accuracy.__name__ modle.compile(losses.CrossEntropy(), optimizers.SGD(lr=0.001), metrics=[losses.categorical_accuracy]) modle.summary() history = modle.train(train_x, train_y, batch_size, epochs=32, validation_data=(val_x, val_y)) epochs = range(1, len(history["loss"]) + 1) plt.plot(epochs, history["loss"], 'ro', label="Traning loss") plt.plot(epochs, history["val_loss"], 'go', label="Validating loss") plt.plot(epochs, history[acc], 'r', label="Traning accuracy") plt.plot(epochs, history["val_" + acc], 'g', label="Validating accuracy") plt.title('Training/Validating loss/accuracy') plt.xlabel('Epochs') plt.ylabel('Loss/Accuracy') plt.legend() plt.show(block=True)
def __init__(self, input_size, output_size, hidden_layers_sizes, loss=mtr.CrossEntropyLoss(), learn_rate=0.01, problem='classification', scorer=mtr.AccuracyScore()): """ Create multi-layer perceptron Args: input_size: neurons on input layer output_size: neurons on output layer hidden_layers_sizes: list of neurons on each hidden layer problem: problem to solve ('classification" or 'regression') """ layer_sizes = [input_size] + hidden_layers_sizes + [output_size] layers = [] for i in range(len(layer_sizes) - 2): layers.append( lrs.Dense(layer_sizes[i], layer_sizes[i + 1], learn_rate)) layers.append(lrs.LeakyReLU()) layers.append(lrs.Dense(layer_sizes[-2], layer_sizes[-1], learn_rate)) if problem == 'classification': layers.append(lrs.Softmax()) super().__init__(layers, loss, scorer)
def get_experiment_metrics(input_transform, output_transform): name_in = input_transform.__class__.__name__ if input_transform is not None else "" name_out = output_transform.__class__.__name__ if output_transform is not None else "" relative_in_transform = None relative_out_transform = None relative_in_transform = Percentage() if name_in == "": relative_out_transform = Percentage() elif name_in == CenterLogRatio.__name__ and name_out != layers.Softmax.__name__: relative_out_transform = layers.Softmax() return [ [ MeanSquaredErrorWrapper(y_true_transformer=input_transform, y_pred_transformer=None), MeanAbsoluteErrorWrapper(y_true_transformer=input_transform, y_pred_transformer=None), MeanAbsolutePercentageErrorWrapper( y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), BrayCurtisDissimilarity(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), PearsonCorrelation(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), # SpearmanCorrelation(y_true_transformer=relative_in_transform, # y_pred_transformer=relative_out_transform), JensenShannonDivergence(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), # CrossEntropy(y_true_transformer=relative_in_transform, # y_pred_transformer=relative_out_transform), ], [ MeanSquaredErrorWrapper(y_true_transformer=input_transform, y_pred_transformer=None), MeanAbsoluteErrorWrapper(y_true_transformer=input_transform, y_pred_transformer=None), MeanAbsolutePercentageErrorWrapper( y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), BrayCurtisDissimilarity(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), PearsonCorrelation(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), # SpearmanCorrelation(y_true_transformer=relative_in_transform, # y_pred_transformer=relative_out_transform), JensenShannonDivergence(y_true_transformer=relative_in_transform, y_pred_transformer=relative_out_transform), # CrossEntropy(y_true_transformer=relative_in_transform, # y_pred_transformer=relative_out_transform), ], [ metrics.MeanAbsoluteError(name='mae'), ] ]
def test_overfitting(cifar, momentum): training = cifar.get_named_batches('data_batch_1').subset(100) net = Network() net.add_layer( layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum) opt.train(training, training, 400) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/overfit_mom{}.png'.format(momentum)) show_plot('images/overfit_mom{}.png'.format(momentum))
def test_vanilla(cifar): training = cifar.get_named_batches('data_batch_1') validation = cifar.get_named_batches('data_batch_2') net = Network() net.add_layer( layers.Linear(cifar.input_size, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = VanillaSGD(net, initial_learning_rate=0.01, decay_factor=0.99, shuffle=True) opt.train(training, validation, 100, 500) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/vanilla.png') show_plot('images/vanilla.png')
if __name__ == '__main__': import layers import datasets import initializers import matplotlib.pyplot as plt cifar = datasets.CIFAR10() training = cifar.get_named_batches('data_batch_1', limit=4) net = Network() net.add_layer(layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) Y = net.evaluate(training.images) print('Cost:', net.cost(training.one_hot_labels, None, Y)) print('Accuracy: {:.2%}'.format( net.accuracy(training.one_hot_labels, None, Y))) plt.subplot(1, 3, 1) plt.imshow(Y) plt.yticks(range(10), cifar.labels) plt.xlabel('Image number') plt.title('Probabilities') plt.subplot(1, 3, 2) plt.imshow(cifar.label_encoder.transform(np.argmax(Y, axis=0)).T) plt.yticks([])
batch_size = 50 num_epochs = 100 num_classes = 2 hidden_units = 100 hidden_units2 = 10 dimensions = 2 # PeaksData da, SwissRollData, GMMData X_train, y_train, X_test, y_test = utils.get_data('PeaksData') X_train, y_train = shuffle(X_train, y_train) # gradient and jacobian tests grad_test_W(X_train, y_train) grad_test_b(X_train, y_train) jacobian_test_W(X_train, y_train) jacobian_test_b(X_train, y_train) grad_test_W_whole_network(X_train, y_train) grad_test_b_whole_network(X_train, y_train) model = models.MyNeuralNetwork() model.add(layers.Linear(dimensions, hidden_units)) model.add(activations.ReLU()) model.add(layers.Softmax(hidden_units, 5)) optimizer = optimizers.SGD(model.parameters, lr=0.1) losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer) # plotting utils.plot_scores(train_accuracy, test_accuracy)
def create_and_train(training: Batch, validation: Batch, epochs: int, hidden_size: int, regularization: float, initial_learning_rate: float, decay_factor: float, momentum: float, train_id: str, test: Batch = None): """ Create and train a 2 layer network: - subtract mean of the training set - linear layer - relu - linear layer - softmax The only parameters that are fixed are the layer initializers and the batch size. :param train_id: :param training: :param validation: :param epochs: :param hidden_size: :param regularization: :param initial_learning_rate: :param decay_factor: :param momentum: :return: """ # Mean of the training set mu = training.mean() # Definition of the network net = Network() net.add_layer(layers.BatchNormalization(CIFAR10.input_size, mu)) net.add_layer(layers.Linear(CIFAR10.input_size, hidden_size, regularization, initializers.Xavier())) net.add_layer(layers.ReLU(hidden_size)) net.add_layer(layers.Linear(hidden_size, CIFAR10.output_size, regularization, initializers.Xavier())) net.add_layer(layers.Softmax(CIFAR10.output_size)) # Training opt = optimizers.MomentumSGD(net, initial_learning_rate, decay_factor, True, momentum) opt.train(training, validation, epochs, 10000) # Plotting plot = costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/{}.png'.format(train_id)) result = { 'epochs': epochs, 'hidden_size': hidden_size, 'regularization': regularization, 'initial_learning_rate': initial_learning_rate, 'decay_factor': decay_factor, 'momentum': momentum, # 'net': net, # 'opt': opt, 'epoch_nums': opt.epoch_nums, 'cost_train': opt.cost_train, 'acc_train': opt.acc_train, 'cost_val': opt.cost_val, 'acc_val': opt.acc_val, 'final_cost_train': opt.cost_train[-1], 'final_acc_train': opt.acc_train[-1], 'final_cost_val': opt.cost_val[-1], 'final_acc_val': opt.acc_val[-1], 'plot': plot } # Test set if test is not None: result['final_cost_test'], result['final_acc_test'] = net.cost_accuracy(test) result['confusion_matrix'] = confusion_matrix_plot(net, test, CIFAR10().labels, 'images/{}_conf.png'.format(train_id)) return result
def __init__(self, *args): self.layer = layers.Softmax() self.inputs = args
def grad_test_b(X_train, y_train): softmax_in = 2 softmax_out = 5 model = models.MyNeuralNetwork() model.add(layers.Softmax(softmax_in, softmax_out)) model.init() for p in model.parameters: p.grad = 0. eps0 = 1 eps = np.array([(0.5**i) * eps0 for i in range(10)]) d = np.random.random((1, 5)) d = d / np.sum(d) grad_diff = [] x_data = np.array([X_train[0]]) x_label = np.array([y_train[0]]) for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = model_grad.forward(x_data) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = model2.forward(x_data) grad_diff.append( np.abs( utils.cross_entropy_loss(probabilities_grad2, x_label) - utils.cross_entropy_loss(probabilities_grad, x_label))) fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True) fig.suptitle('Gradient test by b', fontsize=16) axs[0, 0].plot(eps, grad_diff) axs[0, 0].set_xlabel('$\epsilon$') axs[0, 0].set_title('$|f(x+\epsilon d) - f(x)|$') axs[0, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[0, 1].set_xlabel('$i$') axs[0, 1].set_title('rate of decrease') axs[0, 1].set_ylim([0, 1]) grad_diff = [] for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = copy.deepcopy(model_grad.forward(x_data)) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = copy.deepcopy(model2.forward(x_data)) model2.backward(x_label) grad_x = model2.graph[0].bias.grad grad_diff.append( np.abs( utils.cross_entropy_loss(probabilities_grad2, x_label) - utils.cross_entropy_loss(probabilities_grad, x_label) - epss * np.dot(d.flatten().T, grad_x.flatten()))) axs[1, 0].plot(eps, grad_diff) axs[1, 0].set_xlabel('$\epsilon$') axs[1, 0].set_title('$|f(x+\epsilon d) - f(x) - \epsilon d^{T} grad(x)|$') axs[1, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[1, 1].set_xlabel('$i$') axs[1, 1].set_title('rate of decrease') axs[1, 1].set_ylim([0, 1]) plt.show()
def jacobian_test_b(X_train, y_train): softmax_in = 2 softmax_out = 5 hidden_units = 10 model = models.MyNeuralNetwork() model.add(layers.Linear(softmax_in, hidden_units)) model.add(activations.Tanh()) model.add(layers.Softmax(hidden_units, softmax_out)) model.init() for p in model.parameters: p.grad = 0. eps0 = 1 eps = np.array([(0.5**i) * eps0 for i in range(10)]) d = np.random.random((1, 10)) d = d / np.sum(d) x_data = np.array([X_train[0]]) x_label = np.array([y_train[0]]) grad_diff = [] for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = model_grad.forward(x_data) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = model2.forward(x_data) f_x_eps_d = model2.graph[1].activation_output f_x = model_grad.graph[1].activation_output grad_diff.append(LA.norm(f_x_eps_d - f_x)) fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True) fig.suptitle('Jacobian test by b', fontsize=16) axs[0, 0].plot(eps, grad_diff) axs[0, 0].set_xlabel('$\epsilon$') axs[0, 0].set_title('$||f(x+\epsilon d) - f(x)||$') axs[0, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[0, 1].set_xlabel('$i$') axs[0, 1].set_title('rate of decrease') axs[0, 1].set_ylim([0, 1]) grad_diff = [] for epss in eps: model_grad = copy.deepcopy(model) probabilities_grad = copy.deepcopy(model_grad.forward(x_data)) model2 = copy.deepcopy(model) model2.graph[0].bias.data += d * epss probabilities_grad2 = copy.deepcopy(model2.forward(x_data)) model_grad.backward(x_label) f_x_eps_d = model2.graph[1].activation_output f_x = model_grad.graph[1].activation_output grad = model_grad.graph[0].bias.grad JacMV = epss * np.matmul(d.T, grad) diff = LA.norm(f_x_eps_d - f_x - JacMV) grad_diff.append(diff * epss) axs[1, 0].plot(eps, grad_diff) axs[1, 0].set_xlabel('$\epsilon$') axs[1, 0].set_title('$||f(x+\epsilon d) - f(x) - JavMV(x, \epsilon d)||$') axs[1, 1].plot( range(len(grad_diff) - 1), [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)]) axs[1, 1].set_xlabel('$i$') axs[1, 1].set_title('rate of decrease') axs[1, 1].set_ylim([0, 1]) plt.show()