def test_batch_norm_exceptions(self):
        with self.assertRaises(ValueError):
            # Axis does not exist
            layers.Input(10) > layers.BatchNorm(axes=2)

        with self.assertRaises(ValueError):
            connection = layers.Relu() > layers.BatchNorm()
            connection.initialize()
示例#2
0
    def test_batch_norm_storage(self):
        x_train, x_test, y_train, y_test = simple_classification()

        batch_norm = layers.BatchNorm()
        gdnet = algorithms.MinibatchGradientDescent(
            [
                layers.Input(10),
                layers.Relu(5),
                batch_norm,
                layers.Sigmoid(1),
            ],
            batch_size=10,
        )
        gdnet.train(x_train, y_train)

        error_before_save = gdnet.prediction_error(x_test, y_test)
        mean_before_save = batch_norm.running_mean.get_value()
        inv_std_before_save = batch_norm.running_inv_std.get_value()

        with tempfile.NamedTemporaryFile() as temp:
            storage.save(gdnet, temp.name)
            storage.load(gdnet, temp.name)

            error_after_load = gdnet.prediction_error(x_test, y_test)
            mean_after_load = batch_norm.running_mean.get_value()
            inv_std_after_load = batch_norm.running_inv_std.get_value()

            self.assertAlmostEqual(error_before_save, error_after_load)
            np.testing.assert_array_almost_equal(mean_before_save,
                                                 mean_after_load)
            np.testing.assert_array_almost_equal(inv_std_before_save,
                                                 inv_std_after_load)
    def test_batch_norm_storage(self):
        x_train, x_test, y_train, y_test = simple_classification()

        batch_norm = layers.BatchNorm()
        gdnet = algorithms.GradientDescent(
            [
                layers.Input(10),
                layers.Relu(5),
                batch_norm,
                layers.Sigmoid(1),
            ],
            batch_size=10,
            verbose=True,  # keep it as `True`
        )
        gdnet.train(x_train, y_train, epochs=5)

        error_before_save = gdnet.prediction_error(x_test, y_test)
        mean_before_save = self.eval(batch_norm.running_mean)
        variance_before_save = self.eval(batch_norm.running_inv_std)

        with tempfile.NamedTemporaryFile() as temp:
            storage.save(gdnet, temp.name)
            storage.load(gdnet, temp.name)

            error_after_load = gdnet.prediction_error(x_test, y_test)
            mean_after_load = self.eval(batch_norm.running_mean)
            variance_after_load = self.eval(batch_norm.running_inv_std)

            self.assertAlmostEqual(error_before_save, error_after_load)
            np.testing.assert_array_almost_equal(mean_before_save,
                                                 mean_after_load)

            np.testing.assert_array_almost_equal(variance_before_save,
                                                 variance_after_load)
示例#4
0
    def test_batch_norm_in_non_training_state(self):
        network = layers.join(
            layers.Input(10),
            layers.BatchNorm(),
        )
        input_value = tf.Variable(
            asfloat(np.random.random((30, 10))),
            name='input_value',
            dtype=tf.float32,
        )

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        self.assertEqual(len(update_ops), 0)

        output_value = network.output(input_value)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        self.assertEqual(len(update_ops), 0)

        network.output(input_value, training=True)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        self.assertEqual(len(update_ops), 2)

        # Without training your running mean and std suppose to be
        # equal to 0 and 1 respectavely.
        output_value = self.eval(output_value)
        np.testing.assert_array_almost_equal(self.eval(input_value),
                                             output_value,
                                             decimal=4)
示例#5
0
 def test_conv_output_shape_when_input_unknown(self):
     block = layers.join(
         layers.Convolution((3, 3, 32)),
         layers.Relu(),
         layers.BatchNorm(),
     )
     self.assertShapesEqual(block.input_shape, None)
     self.assertShapesEqual(block.output_shape, (None, None, None, 32))
示例#6
0
 def test_batchnorm_unsuitable_axes_values(self):
     network = layers.join(
         layers.Input((10, 3)),
         layers.BatchNorm(axes=(0, 2, 3)),
     )
     message = ("Batch normalization cannot be applied over one of "
                "the axis, because input has only 3 dimensions")
     with self.assertRaisesRegexp(LayerConnectionError, message):
         network.create_variables()
示例#7
0
 def test_batchnorm_wrong_axes_values(self):
     network = layers.join(
         layers.Relu(),
         layers.BatchNorm(),
     )
     message = ("Cannot initialize variables for the batch normalization "
                "layer, because input shape is undefined")
     with self.assertRaisesRegexp(WeightInitializationError, message):
         network.create_variables()
示例#8
0
 def test_repeat_network(self):
     block = layers.join(
         layers.Convolution((3, 3, 32)),
         layers.Relu(),
         layers.BatchNorm(),
     )
     network = layers.repeat(block, n=5)
     self.assertEqual(len(network), 15)
     self.assertShapesEqual(network.output_shape, (None, None, None, 32))
示例#9
0
    def test_simple_batch_norm(self):
        connection = layers.Input(10) > layers.BatchNorm()

        input_value = theano.shared(value=np.random.random((30, 10)))
        output_value = connection.output(input_value).eval()

        self.assertTrue(stats.mstats.normaltest(output_value))
        self.assertAlmostEqual(output_value.mean(), 0, places=3)
        self.assertAlmostEqual(output_value.std(), 1, places=3)
示例#10
0
    def test_batch_norm_as_shared_variable(self):
        gamma = theano.shared(value=asfloat(np.ones(2)))
        beta = theano.shared(value=asfloat(2 * np.ones(2)))

        batch_norm = layers.BatchNorm(gamma=gamma, beta=beta)
        layers.Input(10) > batch_norm

        self.assertIs(gamma, batch_norm.gamma)
        self.assertIs(beta, batch_norm.beta)
示例#11
0
 def test_batchnorm_unknown_dimension(self):
     network = layers.join(
         layers.Input((10, 10, None)),
         layers.BatchNorm(),
     )
     message = ("Cannot create variables for batch normalization, because "
                "input has unknown dimension #3 \(0-based indices\). "
                "Input shape: \(\?, 10, 10, \?\)")
     with self.assertRaisesRegexp(WeightInitializationError, message):
         network.create_variables()
示例#12
0
    def test_batch_norm_between_layers(self):
        connection = layers.join(
            layers.Input(10),
            layers.Relu(40),
            layers.BatchNorm(),
            layers.Relu(1),
        )

        input_value = np.random.random((30, 10))
        outpu_value = connection.output(input_value).eval()

        self.assertEqual(outpu_value.shape, (30, 1))
示例#13
0
    def test_batch_norm_gamma_beta_params(self):
        default_beta = -3.14
        default_gamma = 4.3
        connection = layers.join(
            layers.Input(10),
            layers.BatchNorm(gamma=default_gamma, beta=default_beta))

        input_value = theano.shared(value=np.random.random((30, 10)))
        output_value = connection.output(input_value).eval()

        self.assertAlmostEqual(output_value.mean(), default_beta, places=3)
        self.assertAlmostEqual(output_value.std(), default_gamma, places=3)
示例#14
0
    def test_simple_batch_norm(self):
        connection = layers.Input(10) > layers.BatchNorm()

        input_value = tf.Variable(
            asfloat(np.random.random((30, 10))),
            name='input_value',
            dtype=tf.float32,
        )
        output_value = self.eval(connection.output(input_value))

        self.assertTrue(stats.mstats.normaltest(output_value))
        self.assertAlmostEqual(output_value.mean(), 0, places=3)
        self.assertAlmostEqual(output_value.std(), 1, places=3)
    def test_batch_norm_between_layers(self):
        connection = surgery.sew_together([
            layers.Input(10),
            layers.Relu(40),
            layers.BatchNorm(),
            layers.Relu(1),
        ])
        connection.initialize()

        input_value = np.random.random((30, 10))
        outpu_value = connection.output(input_value).eval()

        self.assertEqual(outpu_value.shape, (30, 1))
示例#16
0
def ResidualUnit(n_in_filters, n_out_filters, stride, has_branch=False):
    main_branch = layers.join(
        layers.Convolution((n_in_filters, 1, 1), stride=stride, bias=None),
        layers.BatchNorm(),
        layers.Relu(),
        layers.Convolution((n_in_filters, 3, 3), padding=1, bias=None),
        layers.BatchNorm(),
        layers.Relu(),
        layers.Convolution((n_out_filters, 1, 1), bias=None),
        layers.BatchNorm(),
    )

    residual_branch = []
    if has_branch:
        residual_branch = layers.join(
            layers.Convolution((n_out_filters, 1, 1), stride=stride,
                               bias=None),
            layers.BatchNorm(),
        )

    return layers.join(
        [main_branch, residual_branch],
        layers.Elementwise() > layers.Relu(),
    )
示例#17
0
    def test_batch_norm_between_layers(self):
        network = layers.join(
            layers.Input(10),
            layers.Relu(40),
            layers.BatchNorm(),
            layers.Relu(1),
        )

        input_value = tf.Variable(
            asfloat(np.random.random((30, 10))),
            name='input_value',
            dtype=tf.float32,
        )
        outpu_value = self.eval(network.output(input_value, training=True))
        self.assertEqual(outpu_value.shape, (30, 1))
示例#18
0
    def test_batch_norm_gamma_beta_params(self):
        default_beta = -3.14
        default_gamma = 4.3
        connection = layers.join(
            layers.Input(10),
            layers.BatchNorm(gamma=default_gamma, beta=default_beta))

        input_value = tf.Variable(
            asfloat(np.random.random((30, 10))),
            name='input_value',
            dtype=tf.float32,
        )
        output_value = self.eval(connection.output(input_value))

        self.assertAlmostEqual(output_value.mean(), default_beta, places=3)
        self.assertAlmostEqual(output_value.std(), default_gamma, places=3)
示例#19
0
    def test_batch_norm_in_non_training_state(self):
        batch_norm = layers.BatchNorm()
        layers.Input(10) > batch_norm

        input_value = theano.shared(value=np.random.random((30, 10)))

        self.assertEqual(len(batch_norm.updates), 0)

        batch_norm.output(input_value)
        self.assertEqual(len(batch_norm.updates), 2)

        with batch_norm.disable_training_state():
            # Without training your running mean and std suppose to be
            # equal to 0 and 1 respectavely.
            output_value = batch_norm.output(input_value).eval()
            np.testing.assert_array_almost_equal(input_value.get_value(),
                                                 output_value)
示例#20
0
    def test_batch_norm_as_shared_variable(self):
        gamma = tf.Variable(
            asfloat(np.ones(2)),
            name='gamma',
            dtype=tf.float32,
        )
        beta = tf.Variable(
            asfloat(2 * np.ones(2)),
            name='beta',
            dtype=tf.float32,
        )

        batch_norm = layers.BatchNorm(gamma=gamma, beta=beta)
        layers.Input(10) > batch_norm

        self.assertIs(gamma, batch_norm.gamma)
        self.assertIs(beta, batch_norm.beta)
示例#21
0
    def test_batch_norm_as_shared_variable(self):
        gamma = tf.Variable(
            asfloat(np.ones((1, 2))),
            name='gamma',
            dtype=tf.float32,
        )
        beta = tf.Variable(
            asfloat(2 * np.ones((1, 2))),
            name='beta',
            dtype=tf.float32,
        )

        batch_norm = layers.BatchNorm(gamma=gamma, beta=beta)
        network = layers.join(layers.Input(2), batch_norm)
        network.outputs

        self.assertIs(gamma, batch_norm.gamma)
        self.assertIs(beta, batch_norm.beta)
示例#22
0
    def test_batch_norm_in_non_training_state(self):
        batch_norm = layers.BatchNorm()
        layers.Input(10) > batch_norm

        input_value = tf.Variable(
            asfloat(np.random.random((30, 10))),
            name='input_value',
            dtype=tf.float32,
        )

        self.assertEqual(len(batch_norm.updates), 0)

        batch_norm.output(input_value)
        self.assertEqual(len(batch_norm.updates), 2)

        with batch_norm.disable_training_state():
            # Without training your running mean and std suppose to be
            # equal to 0 and 1 respectavely.
            output_value = self.eval(batch_norm.output(input_value))
            np.testing.assert_array_almost_equal(self.eval(input_value),
                                                 output_value,
                                                 decimal=4)
示例#23
0
    def test_storage_save_dict(self):
        network = layers.join(
            layers.parallel([
                layers.Input(2, name='input-1'),
                layers.PRelu(1, name='prelu')
            ], [
                layers.Input(1, name='input-2'),
                layers.Sigmoid(4, name='sigmoid'),
                layers.BatchNorm(name='batch-norm'),
            ]),
            layers.Concatenate(name='concatenate'),
            layers.Softmax(3, name='softmax'),
        )
        dict_network = storage.save_dict(network)

        expected_keys = ('metadata', 'layers', 'graph')
        self.assertItemsEqual(expected_keys, dict_network.keys())

        expected_metadata_keys = ('created', 'language', 'library', 'version')
        actual_metadata_keys = dict_network['metadata'].keys()
        self.assertItemsEqual(expected_metadata_keys, actual_metadata_keys)

        self.assertEqual(len(dict_network['layers']), 7)

        expected_layers = [{
            'class_name': 'Input',
            'configs': {
                'name': 'input-1',
                'shape': (2, )
            },
            'name': 'input-1',
        }, {
            'class_name': 'PRelu',
            'configs': {
                'alpha_axes': (-1, ),
                'name': 'prelu',
                'n_units': 1
            },
            'name': 'prelu',
        }, {
            'class_name': 'Input',
            'configs': {
                'name': 'input-2',
                'shape': (1, )
            },
            'name': 'input-2',
        }, {
            'class_name': 'Sigmoid',
            'configs': {
                'name': 'sigmoid',
                'n_units': 4
            },
            'name': 'sigmoid',
        }, {
            'class_name': 'BatchNorm',
            'configs': {
                'alpha': 0.1,
                'axes': (0, ),
                'epsilon': 1e-05,
                'name': 'batch-norm'
            },
            'name': 'batch-norm',
        }, {
            'class_name': 'Concatenate',
            'configs': {
                'axis': -1,
                'name': 'concatenate'
            },
            'name': 'concatenate',
        }, {
            'class_name': 'Softmax',
            'configs': {
                'name': 'softmax',
                'n_units': 3
            },
            'name': 'softmax',
        }]
        actual_layers = []
        for i, layer in enumerate(dict_network['layers']):
            self.assertIn('parameters', layer, msg="Layer #" + str(i))

            layer = copy.deepcopy(layer)
            del layer['parameters']
            actual_layers.append(layer)

        self.assertEqual(actual_layers, expected_layers)
示例#24
0
mean = x_train.mean(axis=(0, 2, 3)).reshape((1, -1, 1, 1))
std = x_train.std(axis=(0, 2, 3)).reshape((1, -1, 1, 1))

x_train -= mean
x_train /= std
x_test -= mean
x_test /= std

target_scaler = OneHotEncoder()
y_train = target_scaler.fit_transform(y_train.reshape((-1, 1))).todense()
y_test = target_scaler.transform(y_test.reshape((-1, 1))).todense()

network = algorithms.Adadelta(
    [
        layers.Input((3, 32, 32)),
        layers.Convolution((64, 3, 3)) > layers.BatchNorm() > layers.PRelu(),
        layers.Convolution((64, 3, 3)) > layers.BatchNorm() > layers.PRelu(),
        layers.MaxPooling((2, 2)),
        layers.Convolution((128, 3, 3)) > layers.BatchNorm() > layers.PRelu(),
        layers.Convolution((128, 3, 3)) > layers.BatchNorm() > layers.PRelu(),
        layers.MaxPooling((2, 2)),
        layers.Reshape(),
        layers.Linear(1024) > layers.BatchNorm() > layers.PRelu(),
        layers.Linear(1024) > layers.BatchNorm() > layers.PRelu(),
        layers.Softmax(10),
    ],
    error='categorical_crossentropy',
    step=0.25,
    shuffle_data=True,
    batch_size=128,
    verbose=True,
示例#25
0
    mean = x_train.mean(axis=(0, 2, 3))
    std = x_train.std(axis=(0, 2, 3))

    x_train -= mean
    x_train /= std
    x_test -= mean
    x_test /= std

    return x_train, x_test, y_train, y_test


network = algorithms.Adadelta(
    [
        layers.Input((1, 28, 28)),
        layers.Convolution((32, 3, 3)) > layers.BatchNorm() > layers.Relu(),
        layers.Convolution((48, 3, 3)) > layers.BatchNorm() > layers.Relu(),
        layers.MaxPooling((2, 2)),
        layers.Convolution((64, 3, 3)) > layers.BatchNorm() > layers.Relu(),
        layers.MaxPooling((2, 2)),
        layers.Reshape(),
        layers.Linear(1024) > layers.BatchNorm() > layers.Relu(),
        layers.Softmax(10),
    ],

    # Using categorical cross-entropy as a loss function
    error='categorical_crossentropy',

    # Min-batch size
    batch_size=128,
示例#26
0
        residual_branch = layers.join(
            layers.Convolution((n_out_filters, 1, 1), stride=stride,
                               bias=None),
            layers.BatchNorm(),
        )

    return layers.join(
        [main_branch, residual_branch],
        layers.Elementwise() > layers.Relu(),
    )


resnet50 = layers.join(
    layers.Input((3, 224, 224)),
    layers.Convolution((64, 7, 7), stride=2, padding=3),
    layers.BatchNorm(),
    layers.Relu(),
    layers.MaxPooling((3, 3), stride=(2, 2), ignore_border=False),
    ResidualUnit(64, 256, stride=1, has_branch=True),
    ResidualUnit(64, 256, stride=1),
    ResidualUnit(64, 256, stride=1),
    ResidualUnit(128, 512, stride=2, has_branch=True),
    ResidualUnit(128, 512, stride=1),
    ResidualUnit(128, 512, stride=1),
    ResidualUnit(128, 512, stride=1),
    ResidualUnit(256, 1024, stride=2, has_branch=True),
    ResidualUnit(256, 1024, stride=1),
    ResidualUnit(256, 1024, stride=1),
    ResidualUnit(256, 1024, stride=1),
    ResidualUnit(256, 1024, stride=1),
    ResidualUnit(256, 1024, stride=1),
示例#27
0
def resnet50(input_shape=(224, 224, 3), include_global_pool=True,
             in_out_ratio=32):
    """
    ResNet50 network architecture with random parameters. Parameters
    can be loaded using ``neupy.storage`` module.

    ResNet50 has roughly 25.5 million parameters.

    Parameters
    ----------
    input_shape : tuple
        Network's input shape. Defaults to ``(224, 224, 3)``.

    include_global_pool : bool
        Specifies if returned output should include global pooling
        layer. Defaults to ``True``.

    in_out_ratio : {4, 8, 16, 32}
        Every layer that applies strides reduces height and width per every
        image. There are 5 of these layers in Resnet and at the end each
        dimensions gets reduced by ``32``. For example, 224x224 image
        will be reduced to 7x7 image patches. This parameter specifies
        what level of reduction we want to obtain after we've propagated
        network through all the convolution layers.

    Notes
    -----
    Because of the global pooling layer, ResNet50 can be applied to
    the images with variable sizes. The only limitation is that image
    size should be bigger than 32x32, otherwise network won't be able
    to apply all transformations to the image.

    Examples
    --------
    ResNet-50 for ImageNet classification

    >>> from neupy import architectures, algorithms
    >>>
    >>> resnet = architectures.resnet50()
    >>> resnet
    (?, 224, 224, 3) -> [... 187 layers ...] -> (?, 1000)
    >>>
    >>> optimizer = algorithms.Momentum(resnet50)

    ResNet-50 for custom classification task

    >>> from neupy import architectures
    >>> resnet = architectures.resnet50(include_global_pool=False)
    >>> resnet
    (?, 224, 224, 3) -> [... 185 layers ...] -> (?, 7, 7, 2048)
    >>>
    >>> from neupy.layers import *
    >>> resnet = resnet >> GlobalPooling('avg') >> Softmax(21)
    (?, 224, 224, 3) -> [... 187 layers ...] -> (?, 21)

    ResNet-50 for image segmentation

    >>> from neupy import architectures
    >>> resnet = architectures.resnet50(
    ...     include_global_pool=False,
    ...     in_out_ratio=8,
    ... )
    >>> resnet
    (?, 224, 224, 3) -> [... 185 layers ...] -> (?, 28, 28, 2048)

    See Also
    --------
    :architecture:`vgg16` : VGG16 network
    :architecture:`squeezenet` : SqueezeNet network
    :architecture:`resnet50` : ResNet-50 network

    References
    ----------
    Deep Residual Learning for Image Recognition.
    https://arxiv.org/abs/1512.03385
    """
    in_out_configs = {
        4: {'strides': [1, 1, 1], 'rates': [2, 4, 8]},
        8: {'strides': [2, 1, 1], 'rates': [1, 2, 4]},
        16: {'strides': [2, 2, 1], 'rates': [1, 1, 2]},
        32: {'strides': [2, 2, 2], 'rates': [1, 1, 1]},
    }

    if in_out_ratio not in in_out_configs:
        raise ValueError(
            "Expected one of the folowing in_out_ratio values: {}, got "
            "{} instead.".format(in_out_configs.keys(), in_out_ratio))

    strides = in_out_configs[in_out_ratio]['strides']
    rates = in_out_configs[in_out_ratio]['rates']

    resnet = layers.join(
        layers.Input(input_shape),

        # Convolutional layer reduces image's height and width by a factor
        # of 2 (because of the stride)
        # from (3, 224, 224) to (64, 112, 112)
        layers.Convolution(
            (7, 7, 64), stride=2, bias=None,
            padding='same', name='conv1'
        ),
        layers.BatchNorm(name='bn_conv1'),
        layers.Relu(),

        # Stride equal two 2 reduces image size by a factor of two
        # from (64, 112, 112) to (64, 56, 56)
        layers.MaxPooling((3, 3), stride=2, padding="same"),

        # The branch option applies extra convolution x+ batch
        # normalization transformations to the residual
        ResidualUnit(64, name='2a', has_branch=True),
        ResidualUnit(64, name='2b'),
        ResidualUnit(64, name='2c'),

        # When stride=2 reduces width and hight by factor of 2
        ResidualUnit(128, stride=strides[0], name='3a', has_branch=True),
        ResidualUnit(128, rate=rates[0], name='3b'),
        ResidualUnit(128, rate=rates[0], name='3c'),
        ResidualUnit(128, rate=rates[0], name='3d'),

        # When stride=2 reduces width and hight by factor of 2
        ResidualUnit(256, rate=rates[0], name='4a',
                     stride=strides[1], has_branch=True),
        ResidualUnit(256, rate=rates[1], name='4b'),
        ResidualUnit(256, rate=rates[1], name='4c'),
        ResidualUnit(256, rate=rates[1], name='4d'),
        ResidualUnit(256, rate=rates[1], name='4e'),
        ResidualUnit(256, rate=rates[1], name='4f'),

        # When stride=2 reduces width and hight by factor of 2
        ResidualUnit(512, rate=rates[1], name='5a',
                     stride=strides[2], has_branch=True),
        ResidualUnit(512, rate=rates[2], name='5b'),
        ResidualUnit(512, rate=rates[2], name='5c'),
    )

    if include_global_pool:
        resnet = layers.join(
            resnet,
            # Since the final residual unit has 2048 output filters, global
            # pooling will replace every output image with single average
            # value. Despite input image size, output from this layer always
            # will be a vector with 2048 values.
            layers.GlobalPooling('avg'),
            layers.Softmax(1000, name='fc1000'),
        )

    return resnet
示例#28
0
def ResidualUnit(n_input_filters, stride=1, rate=1, has_branch=False,
                 name=None):

    def bn_name(index):
        return 'bn' + name + '_branch' + index

    def conv_name(index):
        return 'res' + name + '_branch' + index

    n_output_filters = 4 * n_input_filters
    main_branch = layers.join(
        # The main purpose of this 1x1 convolution layer is to
        # reduce number of filters. For instance, for the tensor with
        # 256 filters it can be reduced to 64. This trick allows to
        # reduce computation by factor of 4.
        layers.Convolution(
            size=(1, 1, n_input_filters),
            stride=stride,
            bias=None,
            name=conv_name('2a'),
        ),
        layers.BatchNorm(name=bn_name('2a')),
        layers.Relu(),

        # This convolution layer applies 3x3 filter in order to
        # extract features.
        layers.Convolution(
            (3, 3, n_input_filters),
            padding='same',
            dilation=rate,
            bias=None,
            name=conv_name('2b'),
        ),
        layers.BatchNorm(name=bn_name('2b')),
        layers.Relu(),

        # Last layer reverses operations of the first layer. In this
        # case we increase number of filters. For instance, from previously
        # obtained 64 filters we can increase it back to the 256 filters
        layers.Convolution(
            (1, 1, n_output_filters),
            bias=None,
            name=conv_name('2c')
        ),
        layers.BatchNorm(name=bn_name('2c')),
    )

    if has_branch:
        residual_branch = layers.join(
            layers.Convolution(
                (1, 1, n_output_filters),
                stride=stride,
                bias=None,
                name=conv_name('1'),
            ),
            layers.BatchNorm(name=bn_name('1')),
        )
    else:
        # Empty list defines residual connection, meaning that
        # output from this branch would be equal to its input
        residual_branch = layers.Identity('residual-' + name)

    return layers.join(
        # For the output from two branches we just combine results
        # with simple elementwise sum operation. The main purpose of
        # the residual connection is to build shortcuts for the
        # gradient during backpropagation.
        (main_branch | residual_branch),
        layers.Elementwise(),
        layers.Relu(),
    )
def ConvReluBN(*conv_args, **conv_kwargs):
    return layers.join(
        layers.Convolution(*conv_args, **conv_kwargs),
        layers.Relu(),
        layers.BatchNorm(epsilon=0.001),
    )
示例#30
0
 def test_batchnorm_wrong_axes(self):
     message = "Specified axes have to contain only unique values"
     with self.assertRaisesRegexp(ValueError, message):
         layers.BatchNorm(axes=(0, 1, 1))