Пример #1
0
def get_model(X, batch_size, image_dimension):

    input_shape = (batch_size, 3, image_dimension, image_dimension)
    all_parameters = []

    #############################################
    # a first convolution with 32 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        X, X, input_shape, 32, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # a second convolution with 32 (5, 5) filters
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 32, (5, 5))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # MLP first layer

    output = output.flatten(2)
    output_test = output_test.flatten(2)

    output, output_test, params, output_shape = linear(
        output, output_test,
        (output_shape[0], output_shape[1] * output_shape[2] * output_shape[3]),
        500)
    all_parameters += params

    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # MLP second layer

    output, output_test, params, output_shape = linear(output, output_test,
                                                       output_shape, 1)
    all_parameters += params

    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'sigmoid')

    #
    return output, output_test, all_parameters
Пример #2
0
def get_model(X, batch_size, image_dimension):

	input_shape = (batch_size, 3, image_dimension, image_dimension)
	all_parameters = []

	#############################################
	# a first convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(X, X, input_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# a second convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')
	
	#############################################
	# a third convolution with 32 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 32, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# MLP first layer

	output = output.flatten(2)
	output_test = output_test.flatten(2)
	
	output, output_test, params, output_shape = linear(output, output_test, (output_shape[0], output_shape[1]*output_shape[2]*output_shape[3]), 500)
	all_parameters += params

	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# MLP second layer

	output, output_test, params, output_shape = linear(output, output_test, output_shape, 1)
	all_parameters += params

	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'sigmoid')

	#
	return output, output_test, all_parameters
Пример #3
0
    def _build(self, num_classifiers, learning_rate):
        # inputs
        self.X = tf.placeholder(tf.float32, [None, 28, 28])
        self.y = tf.placeholder(tf.int32, [None])
        one_hot_y = tf.one_hot(self.y, 10)

        networks = [
            layers.convolutional(self.X) for _ in range(num_classifiers)
        ]
        self.individual_loss = [
            layers.loss(net, one_hot_y) for net in networks
        ]
        self.individual_accuracy = [
            layers.accuracy(net, one_hot_y) for net in networks
        ]

        logits = tf.reduce_mean(tf.stack(networks, axis=-1), axis=-1)

        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            logits=logits, labels=one_hot_y)
        self.loss = tf.reduce_mean(cross_entropy)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        self.train_op = optimizer.minimize(self.loss)

        correct_prediction = tf.equal(tf.argmax(logits, axis=1),
                                      tf.argmax(one_hot_y, axis=1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.prediction = tf.argmax(logits, axis=1)
Пример #4
0
def get_model(X, batch_size, image_dimension):

	input_shape = (batch_size, 3, image_dimension[0], image_dimension[1])
	all_parameters = []
	acc_parameters = []

	#############################################
	# a first convolution with 64 (3, 3) filters
	output, output_test, params, output_shape = convolutional(X, X, input_shape, 64, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# a second convolution with 128 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 128, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')
	
	#############################################
	# 2 convolutional layers with 256 (3, 3) filters
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 256, (3, 3))
	all_parameters += params
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 256, (3, 3))
	all_parameters += params

	# maxpool with size=(2, 2)
	output, output_test, params, output_shape = maxpool(output, output_test, output_shape, (2, 2))

	# relu activation
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')

	#############################################
	# Fully connected
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 1024, (1, 1))
	all_parameters += params
	output, output_test, params, output_shape = activation(output, output_test, output_shape, 'relu')
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 1024, (1, 1))
	all_parameters += params

	# maxpool with size=(4, 4) and fully connected
	output, output_test, params, output_shape = avgpool(output, output_test, output_shape, (4, 4))
	output, output_test, params, output_shape = convolutional(output, output_test, output_shape, 10, (1, 1))
	all_parameters += params

	output, output_test, params, output_shape, cacc_parameters = batch_norm(output, output_test, output_shape)
	acc_parameters += cacc_parameters
	all_parameters += params

	# softmax
	output = multi_dim_softmax(output)
	output_test = multi_dim_softmax(output_test)

	#
	return output, output_test, all_parameters, acc_parameters
Пример #5
0
def main():
    """
    Train and evaluate the model with the chosen parameters.
    """

    test_images, test_labels = mnist.load('t10k-images.idx3-ubyte',
                                          't10k-labels.idx1-ubyte')
    train_images, train_labels = mnist.load('train-images.idx3-ubyte',
                                            'train-labels.idx1-ubyte')

    test_images = test_images.reshape(test_images.shape[0],
                                      test_images.shape[1],
                                      test_images.shape[2], 1)
    train_images = train_images.reshape(train_images.shape[0],
                                        train_images.shape[1],
                                        train_images.shape[2], 1)

    train_dataset = dataset.Dataset(train_images, train_labels,
                                    args.batch_size)

    # Used for testing on trainng and testing set as GPU does not take the full training/
    # testing sets in memory.
    test_train_dataset = dataset.Dataset(train_images, train_labels, 1000)
    test_dataset = dataset.Dataset(test_images, test_labels, 1000)

    with tf.device('/gpu:0'):
        if args.cnn:
            x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
        else:
            x = tf.placeholder(tf.float32, shape=[None, 784])

        y = tf.placeholder(tf.float32, shape=[None, 10])

        # Two convolutional layers if requested, otherwise fully connected layers.
        if args.cnn:

            kernel_size = [3, 3, 3, 3, 3, 3, 3]

            channels = [16]
            for i in range(1, args.layers):
                channels.append(max(channels[-1] * 2, 64))

            #if args.layers % 2 == 0:
            #    channels[args.layers//2 - 1] = 128
            #    channels[args.layers//2] = 128;

            #    for i in range(args.layers//2 - 1, -1, -1):
            #        channels[i] = channels[i + 1]//2
            #        channels[args.layers - i - 1] = channels[args.layers - i - 2]//2
            #else:
            #    channels[args.layers//2] = 128

            #    for i in range(args.layers//2 - 1, -1, -1):
            #        channels[i] = channels[i + 1]//2
            #        channels[args.layers - i - 1] = channels[args.layers - i - 2]//2

            # weights and biases
            W = []
            b = []

            # conv, activations, pooling and normalizes
            s = []
            h = []
            p = []
            n = [x]

            for i in range(args.layers):

                channels_in = 1
                if i > 0:
                    channels_in = channels[i - 1]

                W.append(
                    initializer('W_conv' + str(i), [
                        kernel_size[i], kernel_size[i], channels_in,
                        channels[i]
                    ]))
                b.append(
                    initializers.constant('b_conv' + str(i), [channels[i]],
                                          value=args.bias))

                s.append(
                    layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                p.append(layers.pooling('p_' + str(i), h[-1]))
                n.append(normalizer('n_' + str(i), p[-1]))

            shape = n[-1].get_shape().as_list()
            n[-1] = tf.reshape(n[-1], [-1, shape[1] * shape[2] * shape[3]])
        else:

            units = [1000, 1000, 1000, 1000, 1000]

            # weights and biases
            W = []
            b = []

            # linear, activations and normalized
            s = []
            h = []
            n = [x]

            for i in range(args.layers):
                units_in = 784
                if i > 0:
                    units_in = units[i - 1]

                W.append(initializer('W_fc' + str(i), [units_in, units[i]]))
                b.append(
                    initializers.constant('b_fc' + str(i), [units[i]],
                                          value=args.bias))

                s.append(
                    layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1]))
                n.append(activation('h_' + str(i), s[-1]))
                n.append(normalizer('n_' + str(i), h[-1]))

        W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100]))
        b.append(initializers.constant('b_fc3', [100], value=args.bias))

        s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1]))
        h.append(activation('h_3', s[-1]))
        n.append(normalizer('n_3', h[-1]))

        W.append(initializer('W_fc4', [100, 10]))
        b.append(initializers.constant('b_fc4', [10], value=args.bias))

        s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1]))
        y_ = layers.softmax('y_', s[-1])

        # Loss definition and optimizer.
        cross_entropy = layers.cross_entropy('cross_entropy', y_, y)

        weights = [v for v in tf.all_variables() if v.name.startswith('W')]
        loss = cross_entropy + args.regularizer_weight * regularizer(
            'regularizer', weights)

        prediction = layers.prediction('prediction', y_)
        label = layers.label('label', y)
        accuracy = layers.accuracy('accuracy', prediction, label)
        optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

        tformer = transformer.Transformer()
        tformer.convert(np.float32)
        tformer.scale(1. / 255.)

        # Reshape input if necessary.
        if not args.cnn:
            tformer.reshape([None, 784])

        with tf.Session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:
            sess.run(tf.initialize_all_variables())

            # First run to visualize variables and backpropagated gradients.
            batch = train_dataset.next_batch()
            res = sess.run([optimizer] + W + h,
                           feed_dict={
                               x: tformer.process(batch[0]),
                               y: batch[1]
                           })

            # Get all the weights for the plot.
            weights = {}
            for i in range(args.layers + 1):
                weights['W_' + str(i)] = res[1 + i]

            plots.plot_normalized_histograms(
                weights, 'images/' + name + '_weights_t0.png')

            # Get all the activations for the plot.
            activations = {}
            for i in range(args.layers + 1):
                activations['h_' + str(i)] = res[1 + args.layers + 1 + i]

            plots.plot_normalized_histograms(
                activations, 'images/' + name + '_activations_t0.png')

            losses = []
            batch_accuracies = []
            train_accuracies = []
            test_accuracies = []

            means = [[] for i in range(args.layers)]
            stds = [[] for i in range(args.layers)]

            iterations = args.epochs * (
                train_dataset.count // args.batch_size + 1)
            interval = iterations // 20
            for i in range(iterations):

                # If this is the last batch, we reshuffle after this step.
                was_last_batch = train_dataset.next_batch_is_last()

                batch = train_dataset.next_batch()
                res = sess.run([optimizer, cross_entropy, accuracy] + h,
                               feed_dict={
                                   x: tformer.process(batch[0]),
                                   y: batch[1]
                               })

                # Get mean and variance of all layer activations after normalization, i.e. after normalization
                for j in range(args.layers):
                    means[j].append(np.mean(res[3 + j]))
                    stds[j].append(np.std(res[3 + j]))

                losses.append(res[1])
                batch_accuracies.append(res[2])

                print('Loss [%d]: %f' % (i, res[1]))
                print('Batch accuracy [%d]: %f' % (i, res[2]))

                if was_last_batch:
                    train_dataset.shuffle()
                    train_dataset.reset()

                if i % interval == 0:

                    # Accuracy on training set.
                    train_accuracy = 0
                    batches = test_train_dataset.count // test_train_dataset.batch_size

                    for j in range(batches):
                        batch = test_train_dataset.next_batch()
                        train_accuracy += sess.run(accuracy,
                                                   feed_dict={
                                                       x: tformer.process(
                                                           batch[0]),
                                                       y: batch[1]
                                                   })

                    train_accuracy = train_accuracy / batches
                    train_accuracies.append(train_accuracy)

                    print('Train accuracy [%d]: %f' % (i, train_accuracy))
                    test_train_dataset.reset()

                    # Accuracy on testing set.
                    test_accuracy = 0
                    batches = test_dataset.count // test_dataset.batch_size

                    for j in range(batches):
                        batch = test_dataset.next_batch()
                        test_accuracy += sess.run(accuracy,
                                                  feed_dict={
                                                      x: tformer.process(
                                                          batch[0]),
                                                      y: batch[1]
                                                  })

                    test_accuracy = test_accuracy / batches
                    test_accuracies.append(test_accuracy)

                    print('Test accuracy [%d]: %f' % (i, test_accuracy))
                    test_dataset.reset()

                    # Plot loss for each iteration.
                    plots.plot_lines(
                        {'loss': (np.arange(0, i + 1), np.array(losses))},
                        'images/' + name + '_loss.png')

                    statistics = {}
                    for j in range(args.layers):
                        statistics['h_' + str(j) + '_mean'] = (np.arange(
                            0, i + 1), np.array(means[j]))
                        statistics['h_' + str(j) + '_std'] = (np.arange(
                            0, i + 1), np.array(stds[j]))

                    # Plot activations for each iteration.
                    plots.plot_lines(statistics,
                                     'images/' + name + '_activations.png')

                    # Plot measures accuracy every 250th iteration.
                    plots.plot_lines(
                        {
                            'batch_accuracy':
                            (np.arange(0, i + 1), np.array(batch_accuracies)),
                            'train_accuracy': (np.arange(0, i + 1, interval),
                                               np.array(train_accuracies)),
                            'test_accuracy': (np.arange(0, i + 1, interval),
                                              np.array(test_accuracies)),
                        }, 'images/' + name + '_accuracy.png')

            sess.close()
Пример #6
0
def main():
    """
    Train a model with the given parameters.

    :return: number of iterations, interval used to measure accuracy, weight mean and standard
        deviation for each iteration and weight, activations in each layer for each iteration
        as mean and standard deviation, accuracies including batch, train and test accuracy
    :rtype: (int, int, {'W_0_mean': numpy.ndarray, ...}, {'h_0_mean': numpy.ndarray, ...},
        {'batch_accuracies': numpy.ndarray, 'test_accuracies': numpy.ndarray, 'train_accuracies': numpy.ndarray})
    """

    # To be sure ...
    tf.reset_default_graph()

    test_images, test_labels = mnist.load('t10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte')
    train_images, train_labels = mnist.load('train-images.idx3-ubyte', 'train-labels.idx1-ubyte')

    test_images = test_images.reshape(test_images.shape[0], test_images.shape[1], test_images.shape[2], 1)
    train_images = train_images.reshape(train_images.shape[0], train_images.shape[1], train_images.shape[2], 1)

    train_dataset = dataset.Dataset(train_images, train_labels, args.batch_size)

    # Used for testing on trainng and testing set as GPU does not take the full training/
    # testing sets in memory.
    test_train_dataset = dataset.Dataset(train_images, train_labels, 1000)
    test_dataset = dataset.Dataset(test_images, test_labels, 1000)

    with tf.device('/gpu:0'):
        tf.set_random_seed(time.time()*1000)

        if args.cnn:
            x = tf.placeholder(tf.float32, shape = [None, 28, 28, 1])
        else:
            x = tf.placeholder(tf.float32, shape = [None, 784])

        if args.normalizer_data:
            n_x = normalizer('n_x', x)
        else:
            n_x = x

        y = tf.placeholder(tf.float32, shape = [None, 10])

        # Two convolutional layers if requested, otherwise fully connected layers.
        if args.cnn:

            kernel_size = [3, 3, 3, 3, 3, 3, 3]

            channels = [16]
            for i in range(1, args.layers):
                channels.append(max(channels[-1]*2, 64))

            # weights and biases
            W = []
            b = []

            # conv, activations, pooling and normalizes
            s = []
            h = []
            p = []
            n = [n_x]

            for i in range(args.layers):

                channels_in = 1
                if i > 0:
                    channels_in = channels[i - 1]

                W.append(initializer('W_conv' + str(i), [kernel_size[i], kernel_size[i], channels_in, channels[i]]))
                b.append(initializers.constant('b_conv' + str(i), [channels[i]], value = args.bias))

                s.append(layers.convolutional('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                p.append(layers.pooling('p_' + str(i), h[-1]))
                n.append(normalizer('n_' + str(i), p[-1]))

            shape = n[-1].get_shape().as_list()
            n[-1] = tf.reshape(n[-1], [-1, shape[1]*shape[2]*shape[3]])
        else:

            units = [1000, 1000, 1000, 1000, 1000]

            # weights and biases
            W = []
            b = []

            # linear, activations and normalized
            s = []
            h = []
            n = [n_x]

            for i in range(args.layers):
                units_in = 784
                if i > 0:
                    units_in = units[i - 1]

                W.append(initializer('W_fc' + str(i), [units_in, units[i]]))
                b.append(initializers.constant('b_fc' + str(i), [units[i]], value = args.bias))

                s.append(layers.inner_product('s_' + str(i), n[-1], W[-1], b[-1]))
                h.append(activation('h_' + str(i), s[-1]))
                n.append(normalizer('n_' + str(i), h[-1]))

        W.append(initializer('W_fc3', [n[-1].get_shape().as_list()[1], 100]))
        b.append(initializers.constant('b_fc3', [100], value = args.bias))

        s.append(layers.inner_product('s_3', n[-1], W[-1], b[-1]))
        h.append(activation('h_3', s[-1]))
        n.append(normalizer('n_3', h[-1]))

        W.append(initializer('W_fc4', [100, 10]))
        b.append(initializers.constant('b_fc4', [10], value = args.bias))

        s.append(layers.inner_product('s_4', n[-1], W[-1], b[-1]))
        y_ = layers.softmax('y_', s[-1])

        # Loss definition and optimizer.
        cross_entropy = layers.cross_entropy('cross_entropy', y_, y)

        weights = [v for v in tf.all_variables() if v.name.startswith('W')]
        loss = cross_entropy + args.regularizer_weight*regularizer('regularizer', weights)

        prediction = layers.prediction('prediction', y_)
        label = layers.label('label', y)
        accuracy = layers.accuracy('accuracy', prediction, label)
        optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

        tformer = transformer.Transformer()
        tformer.convert(np.float32)
        tformer.scale(1./255.)

        # Reshape input if necessary.
        if not args.cnn:
            tformer.reshape([None, 784])

        with tf.Session(config = tf.ConfigProto(log_device_placement = True)) as sess:
            sess.run(tf.initialize_all_variables())

            losses = []
            batch_accuracies = []
            train_accuracies = []
            test_accuracies = []

            n_W = args.layers + 2
            n_h = args.layers + 1

            weight_means = [[] for i in range(n_W)]
            activation_means = [[] for i in range(n_h)]

            iterations = args.epochs*(train_dataset.count//args.batch_size + 1)
            interval = iterations // 20
            for i in range(iterations):

                # If this is the last batch, we reshuffle after this step.
                was_last_batch = train_dataset.next_batch_is_last()

                batch = train_dataset.next_batch()
                res = sess.run([optimizer, cross_entropy, accuracy] + W + h,
                                feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                for j in range(n_W):
                    weight_means[j].append(np.mean(res[3 + j]))

                # Get mean and variance of all layer activations after normalization, i.e. after normalization
                for j in range(n_h):
                    activation_means[j].append(np.mean(res[3 + n_W + j]))

                losses.append(res[1])
                batch_accuracies.append(res[2])

                print('Loss [%d]: %f' % (i, res[1]))
                print('Batch accuracy [%d]: %f' % (i, res[2]))

                if was_last_batch:
                    train_dataset.shuffle()
                    train_dataset.reset()

                if i % interval == 0:

                    # Accuracy on training set.
                    train_accuracy = 0
                    batches = test_train_dataset.count // test_train_dataset.batch_size

                    for j in range(batches):
                        batch = test_train_dataset.next_batch()
                        train_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                    train_accuracy = train_accuracy / batches
                    train_accuracies.append(train_accuracy)

                    print('Train accuracy [%d]: %f' % (i, train_accuracy))
                    test_train_dataset.reset()

                    # Accuracy on testing set.
                    test_accuracy = 0
                    batches = test_dataset.count // test_dataset.batch_size

                    for j in range(batches):
                        batch = test_dataset.next_batch()
                        test_accuracy += sess.run(accuracy, feed_dict = {x: tformer.process(batch[0]), y: batch[1]})

                    test_accuracy = test_accuracy / batches
                    test_accuracies.append(test_accuracy)

                    print('Test accuracy [%d]: %f' % (i, test_accuracy))
                    test_dataset.reset()

            sess.close()

            weights = {}
            for j in range(args.layers):
                weights['W_' + str(j)] = np.array(weight_means[j])

            activations = {}
            for j in range(args.layers):
                activations['h_' + str(j)] = np.array(activation_means[j])

            accuracies = {
                'batch_accuracies': batch_accuracies,
                'train_accuracies': train_accuracies,
                'test_accuracies': test_accuracies
            }

            return iterations, interval, weights, activations, accuracies
Пример #7
0
def get_model(X, batch_size, image_dimension):

    input_shape = (batch_size, 3, image_dimension[0], image_dimension[1])
    all_parameters = []
    acc_parameters = []

    #############################################
    # a first convolution with 64 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        X, X, input_shape, 64, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # a second convolution with 128 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 128, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # 2 convolutional layers with 256 (3, 3) filters
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 256, (3, 3))
    all_parameters += params
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 256, (3, 3))
    all_parameters += params

    # maxpool with size=(2, 2)
    output, output_test, params, output_shape = maxpool(
        output, output_test, output_shape, (2, 2))

    # relu activation
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')

    #############################################
    # Fully connected
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 1024, (1, 1))
    all_parameters += params
    output, output_test, params, output_shape = activation(
        output, output_test, output_shape, 'relu')
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 1024, (1, 1))
    all_parameters += params

    # maxpool with size=(4, 4) and fully connected
    output, output_test, params, output_shape = avgpool(
        output, output_test, output_shape, (4, 4))
    output, output_test, params, output_shape = convolutional(
        output, output_test, output_shape, 10, (1, 1))
    all_parameters += params

    output, output_test, params, output_shape, cacc_parameters = batch_norm(
        output, output_test, output_shape)
    acc_parameters += cacc_parameters
    all_parameters += params

    # softmax
    output = multi_dim_softmax(output)
    output_test = multi_dim_softmax(output_test)

    #
    return output, output_test, all_parameters, acc_parameters