Пример #1
0
  def testConv2DRank1BatchEnsemble(self, alpha_initializer, gamma_initializer):
    tf.keras.backend.set_learning_phase(1)  # training time
    ensemble_size = 3
    examples_per_model = 4
    input_dim = 5
    output_dim = 5
    inputs = tf.random.normal([examples_per_model, 4, 4, input_dim])
    batched_inputs = tf.tile(inputs, [ensemble_size, 1, 1, 1])
    layer = rank1_bnn_layers.Conv2DRank1(
        output_dim,
        kernel_size=2,
        padding='same',
        alpha_initializer=alpha_initializer,
        gamma_initializer=gamma_initializer,
        alpha_regularizer=None,
        gamma_regularizer=None,
        activation=None,
        ensemble_size=ensemble_size)

    output = layer(batched_inputs)
    manual_output = [
        layer.conv2d(inputs*layer.alpha[i]) * layer.gamma[i] + layer.bias[i]
        for i in range(ensemble_size)]
    manual_output = tf.concat(manual_output, axis=0)
    self.assertEqual(output.shape,
                     (ensemble_size*examples_per_model, 4, 4, output_dim))
    self.assertAllClose(output, manual_output)
Пример #2
0
def rank1_resnet_layer(inputs, filters, kernel_size, strides, activation,
                       alpha_initializer, gamma_initializer, alpha_regularizer,
                       gamma_regularizer, use_additive_perturbation,
                       ensemble_size, random_sign_init, dropout_rate):
    """Bayesian rank-1 2D Convolution-Batch Norm-Activation stack builder.

  Args:
    inputs: tf.Tensor.
    filters: Number of filters for Conv2D.
    kernel_size: Kernel dimensions for Conv2D.
    strides: Stride dimensinons for Conv2D.
    activation: tf.keras.activations.Activation.
    alpha_initializer: The initializer for the alpha parameters.
    gamma_initializer: The initializer for the gamma parameters.
    alpha_regularizer: The regularizer for the alpha parameters.
    gamma_regularizer: The regularizer for the gamma parameters.
    use_additive_perturbation: Whether or not to use additive perturbations
      instead of multiplicative perturbations.
    ensemble_size: Number of ensemble members.
    random_sign_init: Value used to initialize trainable deterministic
      initializers, as applicable. Values greater than zero result in
      initialization to a random sign vector, where random_sign_init is the
      probability of a 1 value. Values less than zero result in initialization
      from a Gaussian with mean 1 and standard deviation equal to
      -random_sign_init.
    dropout_rate: Dropout rate.

  Returns:
    tf.Tensor.
  """
    x = inputs
    x = rank1_bnn_layers.Conv2DRank1(
        filters,
        kernel_size=kernel_size,
        strides=strides,
        padding='same',
        use_bias=False,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer='he_normal',
        alpha_regularizer=alpha_regularizer,
        gamma_regularizer=gamma_regularizer,
        use_additive_perturbation=use_additive_perturbation,
        ensemble_size=ensemble_size)(x)
    x = tf.keras.layers.BatchNormalization(epsilon=BATCH_NORM_EPSILON,
                                           momentum=BATCH_NORM_DECAY)(x)
    if activation is not None:
        x = tf.keras.layers.Activation(activation)(x)
    return x
Пример #3
0
 def testConv2DRank1Model(self):
   inputs = np.random.rand(3, 4, 4, 1).astype(np.float32)
   model = tf.keras.Sequential([
       rank1_bnn_layers.Conv2DRank1(3,
                                    kernel_size=2,
                                    padding='SAME',
                                    activation=tf.nn.relu),
       tf.keras.layers.Flatten(),
       tf.keras.layers.Dense(2, activation=None),
   ])
   outputs = model(inputs, training=True)
   self.assertEqual(outputs.shape, (3, 2))
   self.assertLen(model.losses, 2)
Пример #4
0
 def testConv2DRank1AlphaGamma(self, alpha_initializer, gamma_initializer,
                               all_close, use_additive_perturbation,
                               ensemble_size):
     tf.keras.backend.set_learning_phase(1)  # training time
     inputs = np.random.rand(5 * ensemble_size, 4, 4, 12).astype(np.float32)
     model = rank1_bnn_layers.Conv2DRank1(
         4,
         kernel_size=2,
         alpha_initializer=alpha_initializer,
         gamma_initializer=gamma_initializer,
         activation=None)
     outputs1 = model(inputs)
     outputs2 = model(inputs)
     self.assertEqual(outputs1.shape, (5 * ensemble_size, 3, 3, 4))
     if all_close:
         self.assertAllClose(outputs1, outputs2)
     else:
         self.assertNotAllClose(outputs1, outputs2)
     model.get_config()
Пример #5
0
def bottleneck_block(inputs, filters, stage, block, strides, alpha_initializer,
                     gamma_initializer, alpha_regularizer, gamma_regularizer,
                     use_additive_perturbation, ensemble_size,
                     random_sign_init, dropout_rate, prior_stddev, use_tpu):
    """Residual block with 1x1 -> 3x3 -> 1x1 convs in main path.

  Note that strides appear in the second conv (3x3) rather than the first (1x1).
  This is also known as "ResNet v1.5" as it differs from He et al. (2015)
  (http://torch.ch/blog/2016/02/04/resnets.html).

  Args:
    inputs: tf.Tensor.
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: 'a','b'..., current block label, used for generating layer names
    strides: Strides for the second conv layer in the block.
    alpha_initializer: The initializer for the alpha parameters.
    gamma_initializer: The initializer for the gamma parameters.
    alpha_regularizer: The regularizer for the alpha parameters.
    gamma_regularizer: The regularizer for the gamma parameters.
    use_additive_perturbation: Whether or not to use additive perturbations
      instead of multiplicative perturbations.
    ensemble_size: Number of ensemble members.
    random_sign_init: Value used to initialize trainable deterministic
      initializers, as applicable. Values greater than zero result in
      initialization to a random sign vector, where random_sign_init is the
      probability of a 1 value. Values less than zero result in initialization
      from a Gaussian with mean 1 and standard deviation equal to
      -random_sign_init.
    dropout_rate: Dropout rate.
    prior_stddev: Standard deviation of the prior.
    use_tpu: whether the model runs on TPU.

  Returns:
    tf.Tensor.
  """
    filters1, filters2, filters3 = filters
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = rank1_bnn_layers.Conv2DRank1(
        filters1,
        kernel_size=1,
        use_bias=False,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer='he_normal',
        alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                 prior_stddev),
        gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                 prior_stddev),
        use_additive_perturbation=use_additive_perturbation,
        name=conv_name_base + '2a',
        ensemble_size=ensemble_size)(inputs)
    x = ed.layers.ensemble_batchnorm(x,
                                     ensemble_size=ensemble_size,
                                     use_tpu=use_tpu,
                                     momentum=BATCH_NORM_DECAY,
                                     epsilon=BATCH_NORM_EPSILON,
                                     name=bn_name_base + '2a')
    x = tf.keras.layers.Activation('relu')(x)

    x = rank1_bnn_layers.Conv2DRank1(
        filters2,
        kernel_size=3,
        strides=strides,
        padding='same',
        use_bias=False,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer='he_normal',
        alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                 prior_stddev),
        gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                 prior_stddev),
        use_additive_perturbation=use_additive_perturbation,
        name=conv_name_base + '2b',
        ensemble_size=ensemble_size)(x)
    x = ed.layers.ensemble_batchnorm(x,
                                     ensemble_size=ensemble_size,
                                     use_tpu=use_tpu,
                                     momentum=BATCH_NORM_DECAY,
                                     epsilon=BATCH_NORM_EPSILON,
                                     name=bn_name_base + '2b')
    x = tf.keras.layers.Activation('relu')(x)

    x = rank1_bnn_layers.Conv2DRank1(
        filters3,
        kernel_size=1,
        use_bias=False,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer='he_normal',
        alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                 prior_stddev),
        gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                 prior_stddev),
        use_additive_perturbation=use_additive_perturbation,
        name=conv_name_base + '2c',
        ensemble_size=ensemble_size)(x)
    x = ed.layers.ensemble_batchnorm(x,
                                     ensemble_size=ensemble_size,
                                     use_tpu=use_tpu,
                                     momentum=BATCH_NORM_DECAY,
                                     epsilon=BATCH_NORM_EPSILON,
                                     name=bn_name_base + '2c')

    shortcut = inputs
    if not x.shape.is_compatible_with(shortcut.shape):
        shortcut = rank1_bnn_layers.Conv2DRank1(
            filters3,
            kernel_size=1,
            strides=strides,
            use_bias=False,
            alpha_initializer=utils.make_initializer(alpha_initializer,
                                                     random_sign_init,
                                                     dropout_rate),
            gamma_initializer=utils.make_initializer(gamma_initializer,
                                                     random_sign_init,
                                                     dropout_rate),
            kernel_initializer='he_normal',
            alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                     prior_stddev),
            gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                     prior_stddev),
            use_additive_perturbation=use_additive_perturbation,
            name=conv_name_base + '1',
            ensemble_size=ensemble_size)(inputs)
        shortcut = ed.layers.ensemble_batchnorm(shortcut,
                                                ensemble_size=ensemble_size,
                                                use_tpu=use_tpu,
                                                momentum=BATCH_NORM_DECAY,
                                                epsilon=BATCH_NORM_EPSILON,
                                                name=bn_name_base + '1')

    x = tf.keras.layers.add([x, shortcut])
    x = tf.keras.layers.Activation('relu')(x)
    return x
Пример #6
0
def rank1_resnet50(input_shape, num_classes, alpha_initializer,
                   gamma_initializer, alpha_regularizer, gamma_regularizer,
                   use_additive_perturbation, ensemble_size, random_sign_init,
                   dropout_rate, prior_stddev, use_tpu):
    """Builds ResNet50 with rank 1 priors.

  Using strided conv, pooling, four groups of residual blocks, and pooling, the
  network maps spatial features of size 224x224 -> 112x112 -> 56x56 -> 28x28 ->
  14x14 -> 7x7 (Table 1 of He et al. (2015)).

  Args:
    input_shape: Shape tuple of input excluding batch dimension.
    num_classes: Number of output classes.
    alpha_initializer: The initializer for the alpha parameters.
    gamma_initializer: The initializer for the gamma parameters.
    alpha_regularizer: The regularizer for the alpha parameters.
    gamma_regularizer: The regularizer for the gamma parameters.
    use_additive_perturbation: Whether or not to use additive perturbations
      instead of multiplicative perturbations.
    ensemble_size: Number of ensemble members.
    random_sign_init: Value used to initialize trainable deterministic
      initializers, as applicable. Values greater than zero result in
      initialization to a random sign vector, where random_sign_init is the
      probability of a 1 value. Values less than zero result in initialization
      from a Gaussian with mean 1 and standard deviation equal to
      -random_sign_init.
    dropout_rate: Dropout rate.
    prior_stddev: Standard deviation of the prior.
    use_tpu: whether the model runs on TPU.

  Returns:
    tf.keras.Model.
  """
    group_ = functools.partial(
        group,
        alpha_initializer=alpha_initializer,
        gamma_initializer=gamma_initializer,
        alpha_regularizer=alpha_regularizer,
        gamma_regularizer=gamma_regularizer,
        use_additive_perturbation=use_additive_perturbation,
        ensemble_size=ensemble_size,
        random_sign_init=random_sign_init,
        dropout_rate=dropout_rate,
        prior_stddev=prior_stddev,
        use_tpu=use_tpu)
    inputs = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.ZeroPadding2D(padding=3, name='conv1_pad')(inputs)
    x = rank1_bnn_layers.Conv2DRank1(
        64,
        kernel_size=7,
        strides=2,
        padding='valid',
        use_bias=False,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer='he_normal',
        alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                 prior_stddev),
        gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                 prior_stddev),
        use_additive_perturbation=use_additive_perturbation,
        name='conv1',
        ensemble_size=ensemble_size)(x)
    x = ed.layers.ensemble_batchnorm(x,
                                     ensemble_size=ensemble_size,
                                     use_tpu=use_tpu,
                                     momentum=BATCH_NORM_DECAY,
                                     epsilon=BATCH_NORM_EPSILON,
                                     name='bn_conv1')
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.MaxPooling2D(3, strides=(2, 2), padding='same')(x)
    x = group_(x, [64, 64, 256], stage=2, num_blocks=3, strides=1)
    x = group_(x, [128, 128, 512], stage=3, num_blocks=4, strides=2)
    x = group_(x, [256, 256, 1024], stage=4, num_blocks=6, strides=2)
    x = group_(x, [512, 512, 2048], stage=5, num_blocks=3, strides=2)
    x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(x)
    x = rank1_bnn_layers.DenseRank1(
        num_classes,
        alpha_initializer=utils.make_initializer(alpha_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        gamma_initializer=utils.make_initializer(gamma_initializer,
                                                 random_sign_init,
                                                 dropout_rate),
        kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
        alpha_regularizer=utils.make_regularizer(alpha_regularizer, 1.,
                                                 prior_stddev),
        gamma_regularizer=utils.make_regularizer(gamma_regularizer, 1.,
                                                 prior_stddev),
        use_additive_perturbation=use_additive_perturbation,
        ensemble_size=ensemble_size,
        activation=None,
        name='fc1000')(x)
    return tf.keras.Model(inputs=inputs, outputs=x, name='resnet50')