def benchmark_layers_core_dropout_overhead(self):

        layer = core.Dropout(0.5)
        x = array_ops.ones((1, 1))

        def fn():
            layer(x, training=True)

        self._run(fn, 10000)
Пример #2
0
def mnist_model(input_shape):
  """Creates a MNIST model."""
  model = sequential_model_lib.Sequential()

  # Adding custom pass-through layer to visualize input images.
  model.add(LayerForImageSummary())

  model.add(
      conv_layer_lib.Conv2D(
          32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
  model.add(conv_layer_lib.Conv2D(64, (3, 3), activation='relu'))
  model.add(pool_layer_lib.MaxPooling2D(pool_size=(2, 2)))
  model.add(layer_lib.Dropout(0.25))
  model.add(layer_lib.Flatten())
  model.add(layer_lib.Dense(128, activation='relu'))
  model.add(layer_lib.Dropout(0.5))
  model.add(layer_lib.Dense(NUM_CLASSES, activation='softmax'))

  # Adding custom pass-through layer for summary recording.
  model.add(LayerForHistogramSummary())
  return model
Пример #3
0
    def testOptimizerWithCallbacks(self):
        np.random.seed(1331)
        input_np = np.random.random((10, 3))
        output_np = np.random.random((10, 4))
        a = input_layer.Input(shape=(3, ), name='input_a')
        model = sequential.Sequential()
        model.add(core.Dense(4, name='dense'))
        model.add(core.Dropout(0.5, name='dropout'))
        model(a)
        optimizer = gradient_descent.SGD(learning_rate=0.1)
        model.compile(optimizer, loss='mse', metrics=['mae'])
        # This does not reduce the LR after the first epoch (due to low delta).
        cbks = [
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        min_delta=0,
                                        patience=1,
                                        cooldown=5)
        ]
        model.fit(input_np,
                  output_np,
                  batch_size=10,
                  validation_data=(input_np, output_np),
                  callbacks=cbks,
                  epochs=2,
                  verbose=0)
        self.assertAllClose(float(backend.get_value(model.optimizer.lr)),
                            0.1,
                            atol=1e-4)

        # This should reduce the LR after the first epoch (due to high delta).
        cbks = [
            callbacks.ReduceLROnPlateau(monitor='val_loss',
                                        factor=0.1,
                                        min_delta=10,
                                        patience=1,
                                        cooldown=5)
        ]
        model.fit(input_np,
                  output_np,
                  batch_size=10,
                  validation_data=(input_np, output_np),
                  callbacks=cbks,
                  epochs=2,
                  verbose=2)
        self.assertAllClose(float(backend.get_value(model.optimizer.lr)),
                            0.01,
                            atol=1e-4)
Пример #4
0
 def _build_attention(self, rank):
     """Builds multi-head dot-product attention computations.
 This function builds attributes necessary for `_compute_attention` to
 costomize attention computation to replace the default dot-product
 attention.
 Args:
   rank: the rank of query, key, value tensors.
 """
     if self._attention_axes is None:
         self._attention_axes = tuple(range(1, rank - 2))
     else:
         self._attention_axes = tuple(self._attention_axes)
     self._dot_product_equation, self._combine_equation, attn_scores_rank = (
         _build_attention_equation(rank, attn_axes=self._attention_axes))
     norm_axes = tuple(
         range(attn_scores_rank - len(self._attention_axes),
               attn_scores_rank))
     self._softmax = advanced_activations.Softmax(axis=norm_axes)
     self._dropout_layer = core.Dropout(rate=self._dropout)
Пример #5
0
    def testOptimizerWithKerasModel(self):
        a = input_layer.Input(shape=(3, ), name='input_a')
        b = input_layer.Input(shape=(3, ), name='input_b')

        dense = core.Dense(4, name='dense')
        c = dense(a)
        d = dense(b)
        e = core.Dropout(0.5, name='dropout')(c)

        model = training.Model([a, b], [d, e])

        optimizer = gradient_descent.SGD(learning_rate=0.001)
        loss = 'mse'
        model.compile(optimizer, loss, metrics=['mae'])

        input_a_np = np.random.random((10, 3))
        input_b_np = np.random.random((10, 3))

        output_d_np = np.random.random((10, 4))
        output_e_np = np.random.random((10, 4))

        model.fit([input_a_np, input_b_np], [output_d_np, output_e_np],
                  epochs=1,
                  batch_size=5)
Пример #6
0
class LayerCorrectnessTest(keras_parameterized.TestCase):

  def setUp(self):
    super(LayerCorrectnessTest, self).setUp()
    # Set two virtual CPUs to test MirroredStrategy with multiple devices
    cpus = config_module.list_physical_devices('CPU')
    config_module.set_logical_device_configuration(cpus[0], [
        context.LogicalDeviceConfiguration(),
        context.LogicalDeviceConfiguration(),
    ])

  def _create_model_from_layer(self, layer, input_shapes):
    inputs = [layers.Input(batch_input_shape=s) for s in input_shapes]
    if len(inputs) == 1:
      inputs = inputs[0]
    y = layer(inputs)
    model = models.Model(inputs, y)
    model.compile('sgd', 'mse')
    return model

  @parameterized.named_parameters(
      ('LeakyReLU', advanced_activations.LeakyReLU, (2, 2)),
      ('PReLU', advanced_activations.PReLU, (2, 2)),
      ('ELU', advanced_activations.ELU, (2, 2)),
      ('ThresholdedReLU', advanced_activations.ThresholdedReLU, (2, 2)),
      ('Softmax', advanced_activations.Softmax, (2, 2)),
      ('ReLU', advanced_activations.ReLU, (2, 2)),
      ('Conv1D', lambda: convolutional.Conv1D(2, 2), (2, 2, 1)),
      ('Conv2D', lambda: convolutional.Conv2D(2, 2), (2, 2, 2, 1)),
      ('Conv3D', lambda: convolutional.Conv3D(2, 2), (2, 2, 2, 2, 1)),
      ('Conv2DTranspose', lambda: convolutional.Conv2DTranspose(2, 2),
       (2, 2, 2, 2)),
      ('SeparableConv2D', lambda: convolutional.SeparableConv2D(2, 2),
       (2, 2, 2, 1)),
      ('DepthwiseConv2D', lambda: convolutional.DepthwiseConv2D(2, 2),
       (2, 2, 2, 1)),
      ('UpSampling2D', convolutional.UpSampling2D, (2, 2, 2, 1)),
      ('ZeroPadding2D', convolutional.ZeroPadding2D, (2, 2, 2, 1)),
      ('Cropping2D', convolutional.Cropping2D, (2, 3, 3, 1)),
      ('ConvLSTM2D',
       lambda: convolutional_recurrent.ConvLSTM2D(4, kernel_size=(2, 2)),
       (4, 4, 4, 4, 4)),
      ('Dense', lambda: core.Dense(2), (2, 2)),
      ('Dropout', lambda: core.Dropout(0.5), (2, 2)),
      ('SpatialDropout2D', lambda: core.SpatialDropout2D(0.5), (2, 2, 2, 2)),
      ('Activation', lambda: core.Activation('sigmoid'), (2, 2)),
      ('Reshape', lambda: core.Reshape((1, 4, 1)), (2, 2, 2)),
      ('Permute', lambda: core.Permute((2, 1)), (2, 2, 2)),
      ('Attention', dense_attention.Attention, [(2, 2, 3), (2, 3, 3),
                                                (2, 3, 3)]),
      ('AdditiveAttention', dense_attention.AdditiveAttention, [(2, 2, 3),
                                                                (2, 3, 3),
                                                                (2, 3, 3)]),
      ('Embedding', lambda: embeddings.Embedding(4, 4),
       (2, 4), 2e-3, 2e-3, np.random.randint(4, size=(2, 4))),
      ('LocallyConnected1D', lambda: local.LocallyConnected1D(2, 2), (2, 2, 1)),
      ('LocallyConnected2D', lambda: local.LocallyConnected2D(2, 2),
       (2, 2, 2, 1)),
      ('Add', merge.Add, [(2, 2), (2, 2)]),
      ('Subtract', merge.Subtract, [(2, 2), (2, 2)]),
      ('Multiply', merge.Multiply, [(2, 2), (2, 2)]),
      ('Average', merge.Average, [(2, 2), (2, 2)]),
      ('Maximum', merge.Maximum, [(2, 2), (2, 2)]),
      ('Minimum', merge.Minimum, [(2, 2), (2, 2)]),
      ('Concatenate', merge.Concatenate, [(2, 2), (2, 2)]),
      ('Dot', lambda: merge.Dot(1), [(2, 2), (2, 2)]),
      ('GaussianNoise', lambda: noise.GaussianNoise(0.5), (2, 2)),
      ('GaussianDropout', lambda: noise.GaussianDropout(0.5), (2, 2)),
      ('AlphaDropout', lambda: noise.AlphaDropout(0.5), (2, 2)),
      ('BatchNormalization', normalization_v2.BatchNormalization,
       (2, 2), 1e-2, 1e-2),
      ('LayerNormalization', normalization.LayerNormalization, (2, 2)),
      ('LayerNormalizationUnfused',
       lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)),
      ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)),
      ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)),
      ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)),
      ('GlobalAveragePooling2D', pooling.GlobalAveragePooling2D, (2, 2, 2, 1)),
      ('SimpleRNN', lambda: recurrent.SimpleRNN(units=4),
       (4, 4, 4), 1e-2, 1e-2),
      ('GRU', lambda: recurrent.GRU(units=4), (4, 4, 4)),
      ('LSTM', lambda: recurrent.LSTM(units=4), (4, 4, 4)),
      ('GRUV2', lambda: recurrent_v2.GRU(units=4), (4, 4, 4)),
      ('LSTMV2', lambda: recurrent_v2.LSTM(units=4), (4, 4, 4)),
      ('TimeDistributed', lambda: wrappers.TimeDistributed(core.Dense(2)),
       (2, 2, 2)),
      ('Bidirectional',
       lambda: wrappers.Bidirectional(recurrent.SimpleRNN(units=4)), (2, 2, 2)),
      ('AttentionLayerCausal', lambda: dense_attention.Attention(causal=True), [
          (2, 2, 3), (2, 3, 3), (2, 3, 3)
      ]),
      ('AdditiveAttentionLayerCausal',
       lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4),
                                                                (2, 3, 4),
                                                                (2, 3, 4)]),
  )
  def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3,
                 input_data=None):
    """Tests a layer by comparing the float32 and mixed precision weights.

    A float32 layer, a mixed precision layer, and a distributed mixed precision
    layer are run. The three layers are identical other than their dtypes and
    distribution strategies. The outputs after predict() and weights after fit()
    are asserted to be close.

    Args:
      f32_layer_fn: A function returning a float32 layer. The other two layers
        will automatically be created from this
      input_shape: The shape of the input to the layer, including the batch
        dimension. Or a list of shapes if the layer takes multiple inputs.
      rtol: The relative tolerance to be asserted.
      atol: The absolute tolerance to be asserted.
      input_data: A Numpy array with the data of the input. If None, input data
        will be randomly generated
    """

    if f32_layer_fn == convolutional.ZeroPadding2D and \
       test.is_built_with_rocm():
      return
    if isinstance(input_shape[0], int):
      input_shapes = [input_shape]
    else:
      input_shapes = input_shape
    strategy = create_mirrored_strategy()
    f32_layer = f32_layer_fn()

    # Create the layers
    assert f32_layer.dtype == f32_layer._compute_dtype == 'float32'
    config = f32_layer.get_config()
    config['dtype'] = policy.Policy('mixed_float16')
    mp_layer = f32_layer.__class__.from_config(config)
    distributed_mp_layer = f32_layer.__class__.from_config(config)

    # Compute per_replica_input_shapes for the distributed model
    global_batch_size = input_shapes[0][0]
    assert global_batch_size % strategy.num_replicas_in_sync == 0, (
        'The number of replicas, %d, does not divide the global batch size of '
        '%d' % (strategy.num_replicas_in_sync, global_batch_size))
    per_replica_batch_size = (
        global_batch_size // strategy.num_replicas_in_sync)
    per_replica_input_shapes = [(per_replica_batch_size,) + s[1:]
                                for s in input_shapes]

    # Create the models
    f32_model = self._create_model_from_layer(f32_layer, input_shapes)
    mp_model = self._create_model_from_layer(mp_layer, input_shapes)
    with strategy.scope():
      distributed_mp_model = self._create_model_from_layer(
          distributed_mp_layer, per_replica_input_shapes)

    # Set all model weights to the same values
    f32_weights = f32_model.get_weights()
    mp_model.set_weights(f32_weights)
    distributed_mp_model.set_weights(f32_weights)

    # Generate input data
    if input_data is None:
      # Cast inputs to float16 to avoid measuring error from having f16 layers
      # cast to float16.
      input_data = [np.random.normal(size=s).astype('float16')
                    for s in input_shapes]
      if len(input_data) == 1:
        input_data = input_data[0]

    # Assert all models have close outputs.
    f32_output = f32_model.predict(input_data)
    mp_output = mp_model.predict(input_data)
    self.assertAllClose(
        mp_output, f32_output, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.predict(input_data), f32_output, rtol=rtol,
        atol=atol)

    # Run fit() on models
    output = np.random.normal(size=f32_model.outputs[0].shape).astype('float16')
    for model in f32_model, mp_model, distributed_mp_model:
      model.fit(input_data, output, batch_size=global_batch_size)

    # Assert all models have close weights
    f32_weights = f32_model.get_weights()
    self.assertAllClose(
        mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
    self.assertAllClose(
        distributed_mp_model.get_weights(), f32_weights, rtol=rtol, atol=atol)
Пример #7
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModelV2, self).__init__(name=name, **kwargs)
    with ops.name_scope(
        'input_from_feature_columns') as input_feature_column_scope:
      layer_name = input_feature_column_scope + 'input_layer'
      if feature_column_lib.is_feature_column_v2(feature_columns):
        self._input_layer = dense_features_v2.DenseFeatures(
            feature_columns=feature_columns, name=layer_name)
      else:
        raise ValueError(
            'Received a feature column from TensorFlow v1, but this is a '
            'TensorFlow v2 Estimator. Please either use v2 feature columns '
            '(accessible via tf.feature_column.* in TF 2.x) with this '
            'Estimator, or switch to a v1 Estimator for use with v1 feature '
            'columns (accessible via tf.compat.v1.estimator.* and '
            'tf.compat.v1.feature_column.*, respectively.')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with ops.name_scope('hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        # Get scope name without the trailing slash.
        hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
        hidden_layer = keras_core.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
            name=hidden_shared_name)
        self._hidden_layer_scope_names.append(hidden_shared_name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = keras_core.Dropout(rate=self._dropout)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
          # TODO(scottzhu): Change back to use BatchNormalization when the
          # cleanup is done.
          batch_norm_layer = keras_norm.BatchNormalizationBase(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name=batch_norm_name)
          self._batch_norm_layers.append(batch_norm_layer)

    with ops.name_scope('logits') as logits_scope:
      logits_shared_name = _name_from_scope_name(logits_scope)
      self._logits_layer = keras_core.Dense(
          units=units,
          activation=None,
          kernel_initializer=tf.compat.v1.glorot_uniform_initializer(),
          name=logits_shared_name)
      self._logits_scope_name = logits_shared_name
Пример #8
0
 def _add_soft_max_and_dropout_layers(self):
     self._softmax = partial(softmax, axis=self._norm_axes)
     self._dropout_layer = core.Dropout(rate=self._dropout)