Exemplo n.º 1
0
 def __init__(self, input_units, hidden_units, output_units, w_initializers=initializers.Xavier(),
              recursive_w_initializers=initializers.Xavier(), ):
     # how many sets of biases??
     self.w_x = w_initializers((input_units, hidden_units))  # input to hidden
     self.w_h = recursive_w_initializers((hidden_units, hidden_units))  # hidden to hidden
     self.w_y = w_initializers((hidden_units, output_units))  # hidden to output
     self.optimizer = None
     self._trainable = True
    def test_overfitting(cifar, momentum):
        training = cifar.get_named_batches('data_batch_1').subset(100)

        net = Network()
        net.add_layer(
            layers.Linear(cifar.input_size, 50, 0, initializers.Xavier()))
        net.add_layer(layers.ReLU(50))
        net.add_layer(
            layers.Linear(50, cifar.output_size, 0, initializers.Xavier()))
        net.add_layer(layers.Softmax(cifar.output_size))

        opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum)

        opt.train(training, training, 400)

        costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                              opt.cost_train, opt.cost_val,
                              'images/overfit_mom{}.png'.format(momentum))
        show_plot('images/overfit_mom{}.png'.format(momentum))
    def test_vanilla(cifar):
        training = cifar.get_named_batches('data_batch_1')
        validation = cifar.get_named_batches('data_batch_2')

        net = Network()
        net.add_layer(
            layers.Linear(cifar.input_size, cifar.output_size, 0,
                          initializers.Xavier()))
        net.add_layer(layers.Softmax(cifar.output_size))

        opt = VanillaSGD(net,
                         initial_learning_rate=0.01,
                         decay_factor=0.99,
                         shuffle=True)

        opt.train(training, validation, 100, 500)

        costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                              opt.cost_train, opt.cost_val,
                              'images/vanilla.png')
        show_plot('images/vanilla.png')
Exemplo n.º 4
0
        return (self.cost(dataset.one_hot_labels, None,
                          Y), self.accuracy(dataset.one_hot_labels, None, Y))


if __name__ == '__main__':
    import layers
    import datasets
    import initializers
    import matplotlib.pyplot as plt

    cifar = datasets.CIFAR10()
    training = cifar.get_named_batches('data_batch_1', limit=4)

    net = Network()
    net.add_layer(layers.Linear(cifar.input_size, 50, 0,
                                initializers.Xavier()))
    net.add_layer(layers.ReLU(50))
    net.add_layer(
        layers.Linear(50, cifar.output_size, 0, initializers.Xavier()))
    net.add_layer(layers.Softmax(cifar.output_size))

    Y = net.evaluate(training.images)
    print('Cost:', net.cost(training.one_hot_labels, None, Y))
    print('Accuracy: {:.2%}'.format(
        net.accuracy(training.one_hot_labels, None, Y)))

    plt.subplot(1, 3, 1)
    plt.imshow(Y)
    plt.yticks(range(10), cifar.labels)
    plt.xlabel('Image number')
    plt.title('Probabilities')
Exemplo n.º 5
0
 def __init__(self, input_units, output_units, w_initializers=initializers.Xavier(),
              biases_initializer=initializers.Zeros()):
     self.weights = w_initializers((input_units, output_units))
     self.biases = biases_initializer((output_units))
     self.optimizer = None
     self._trainable = True
Exemplo n.º 6
0
def create_and_train(training: Batch,
                     validation: Batch,
                     epochs: int,
                     hidden_size: int,
                     regularization: float,
                     initial_learning_rate: float,
                     decay_factor: float,
                     momentum: float,
                     train_id: str,
                     test: Batch = None):
    """
    Create and train a 2 layer network:
    - subtract mean of the training set
    - linear layer
    - relu
    - linear layer
    - softmax

    The only parameters that are fixed are the layer initializers
    and the batch size.

    :param train_id:
    :param training:
    :param validation:
    :param epochs:
    :param hidden_size:
    :param regularization:
    :param initial_learning_rate:
    :param decay_factor:
    :param momentum:
    :return:
    """
    # Mean of the training set
    mu = training.mean()

    # Definition of the network
    net = Network()
    net.add_layer(layers.BatchNormalization(CIFAR10.input_size, mu))
    net.add_layer(layers.Linear(CIFAR10.input_size, hidden_size, regularization, initializers.Xavier()))
    net.add_layer(layers.ReLU(hidden_size))
    net.add_layer(layers.Linear(hidden_size, CIFAR10.output_size, regularization, initializers.Xavier()))
    net.add_layer(layers.Softmax(CIFAR10.output_size))

    # Training
    opt = optimizers.MomentumSGD(net, initial_learning_rate, decay_factor, True, momentum)
    opt.train(training, validation, epochs, 10000)

    # Plotting
    plot = costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                                 opt.cost_train, opt.cost_val,
                                 'images/{}.png'.format(train_id))

    result = {
        'epochs': epochs,
        'hidden_size': hidden_size,
        'regularization': regularization,
        'initial_learning_rate': initial_learning_rate,
        'decay_factor': decay_factor,
        'momentum': momentum,
        # 'net': net,
        # 'opt': opt,
        'epoch_nums': opt.epoch_nums,
        'cost_train': opt.cost_train,
        'acc_train': opt.acc_train,
        'cost_val': opt.cost_val,
        'acc_val': opt.acc_val,
        'final_cost_train': opt.cost_train[-1],
        'final_acc_train': opt.acc_train[-1],
        'final_cost_val': opt.cost_val[-1],
        'final_acc_val': opt.acc_val[-1],
        'plot': plot
    }

    # Test set
    if test is not None:
        result['final_cost_test'], result['final_acc_test'] = net.cost_accuracy(test)
        result['confusion_matrix'] = confusion_matrix_plot(net, test,
                                                           CIFAR10().labels,
                                                           'images/{}_conf.png'.format(train_id))

    return result
Exemplo n.º 7
0
        np.abs(grad).max(),
        np.abs(grad_num).max()))


if __name__ == '__main__':
    import layers
    import initializers
    import datasets

    cifar = datasets.CIFAR10()
    training = cifar.get_named_batches('data_batch_1').subset(50)

    # One layer network with regularization
    net = Network()
    linear = layers.Linear(cifar.input_size, cifar.output_size, 0.2,
                           initializers.Xavier())
    net.add_layer(linear)
    net.add_layer(layers.Softmax(cifar.output_size))

    outputs = net.evaluate(training.images)
    net.backward(training.one_hot_labels)
    cost = net.cost(training.one_hot_labels, outputs=outputs)

    # Weights matrix
    grad_num = compute_grads_for_matrix(training.one_hot_labels,
                                        training.images, linear.W, net, cost)
    print_grad_diff(linear.grad_W, grad_num, 'Grad W')

    # Biases matrix
    grad_num = compute_grads_for_matrix(training.one_hot_labels,
                                        training.images, linear.b, net, cost)