Пример #1
0
    def __init__(self,
                 num_hidden_layers,
                 dropout=True,
                 dropout_rate=0.2,
                 activation=None,
                 kernel_initializer=None,
                 bias_initializer=tf.zeros_initializer(),
                 kernel_regularizer=None,
                 name=None,
                 reuse=False):

        self.layers = []
        self.num_hidden_layers = num_hidden_layers
        self._name = name if name else 'MLP'
        self._reuse = reuse

        for zb_layer_num, num_hidden_layer in enumerate(num_hidden_layers):
            layer_num = zb_layer_num + 1

            if dropout and layer_num < len(num_hidden_layers):
                self.layers.append(layers_core.Dropout(rate=dropout_rate))
                layer_activation = activation
            else:
                layer_activation = None

            self.layers.append(
                layers_core.Dense(
                    num_hidden_layer,  # Dense dim ;   output_dim, 前一维度,自动推断;
                    activation=layer_activation,
                    kernel_initializer=kernel_initializer,
                    bias_initializer=bias_initializer,
                    kernel_regularizer=kernel_regularizer,
                    trainable=True,
                    name='{}/dense_{}'.format(self._name, layer_num),
                    _reuse=reuse))
Пример #2
0
    def call(self, inputs, state):

        if not isinstance(state, tf.contrib.seq2seq.AttentionWrapperState):
            raise TypeError("Expected state to be instance of MyAttentionWrapperState. "
                          "Received type %s instead."  % type(state))
        
        cell_inputs = self._cell_input_fn(inputs, state.attention)
        cell_state = state.cell_state
        cell_output, next_cell_state = self._cell(cell_inputs, cell_state)

        cell_batch_size = (
            cell_output.shape[0].value or tf.shape(cell_output)[0])
        error_message = ('custom error msg:1')
        with tf.control_dependencies(
            self._batch_size_checks(cell_batch_size, error_message)):
            cell_output = tf.identity(
              cell_output, name="checked_cell_output")

        if self._is_multi:
            previous_attention_state = state.attention_state
            previous_alignment_history = state.alignment_history
        else:
            previous_attention_state = [state.attention_state]
            previous_alignment_history = [state.alignment_history]

        all_alignments = []
        all_attentions = []
        all_attention_states = []
        maybe_all_histories = []
        for i, attention_mechanism in enumerate(self._attention_mechanisms):
            attention, alignments, next_attention_state = _compute_attention(
            attention_mechanism, cell_output, previous_attention_state[i],
            self._attention_layers[i] if self._attention_layers else None,
            self._attention_dropout_layers[i] if self._attention_dropout_layers else None,
            self.training)
            alignment_history = previous_alignment_history[i].write(
            state.time, alignments) if self._alignment_history else ()

            all_attention_states.append(next_attention_state)
            all_alignments.append(alignments)
            all_attentions.append(attention)
            maybe_all_histories.append(alignment_history)

        attention = tf.concat(all_attentions, 1)
        
        next_state = tf.contrib.seq2seq.AttentionWrapperState(
            time=state.time + 1,
            cell_state=next_cell_state,
            attention=attention,
            attention_state=self._item_or_tuple(all_attention_states),
            alignments=self._item_or_tuple(all_alignments),
            alignment_history=self._item_or_tuple(maybe_all_histories))

        outputs = layers_core.Dropout(rate=1-keep_prob)(self.attention_layer(tf.concat([cell_output, attention], 1)), training=self.training)
        
        if self._output_attention:
            return outputs, next_state
        else:
            raise NotImplementedError()
Пример #3
0
 def testCustomNoiseShape(self):
     inputs = array_ops.ones((5, 3, 2))
     noise_shape = [5, 1, 2]
     dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
     dropped = dp.apply(inputs, training=True)
     self.evaluate(variables.global_variables_initializer())
     np_output = self.evaluate(dropped)
     self.assertAlmostEqual(0., np_output.min())
     self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
Пример #4
0
 def testCustomNoiseShape(self):
     with self.test_session() as sess:
         inputs = tf.ones((5, 3, 2))
         noise_shape = [5, 1, 2]
         dp = core_layers.Dropout(0.5, noise_shape=noise_shape, seed=1)
         dropped = dp.apply(inputs, training=True)
         sess.run(tf.global_variables_initializer())
         np_output = sess.run(dropped)
         self.assertAlmostEqual(0., np_output.min())
         self.assertAllClose(np_output[:, 0, :], np_output[:, 1, :])
Пример #5
0
 def testDynamicRate(self):
     with self.cached_session() as sess:
         rate = array_ops.placeholder(dtype='float32', name='rate')
         dp = core_layers.Dropout(rate, name='dropout')
         inputs = array_ops.ones((5, 5))
         dropped = dp.apply(inputs, training=True)
         self.evaluate(variables.global_variables_initializer())
         np_output = sess.run(dropped, feed_dict={rate: 0.5})
         self.assertAlmostEqual(0., np_output.min())
         np_output = sess.run(dropped, feed_dict={rate: 0.0})
         self.assertAllClose(np.ones((5, 5)), np_output)
Пример #6
0
 def testDynamicLearningPhase(self):
     with self.cached_session() as sess:
         dp = core_layers.Dropout(0.5, seed=1)
         inputs = array_ops.ones((5, 5))
         training = array_ops.placeholder(dtype='bool')
         dropped = dp.apply(inputs, training=training)
         self.evaluate(variables.global_variables_initializer())
         np_output = sess.run(dropped, feed_dict={training: True})
         self.assertAlmostEqual(0., np_output.min())
         np_output = sess.run(dropped, feed_dict={training: False})
         self.assertAllClose(np.ones((5, 5)), np_output)
Пример #7
0
 def testBooleanLearningPhase(self):
     dp = core_layers.Dropout(0.5)
     inputs = array_ops.ones((5, 3))
     dropped = dp.apply(inputs, training=True)
     if not context.executing_eagerly():
         self.evaluate(variables.global_variables_initializer())
     np_output = self.evaluate(dropped)
     self.assertAlmostEqual(0., np_output.min())
     dropped = dp.apply(inputs, training=False)
     np_output = self.evaluate(dropped)
     self.assertAllClose(np.ones((5, 3)), np_output)
Пример #8
0
 def testBooleanLearningPhase(self):
     with self.test_session() as sess:
         dp = core_layers.Dropout(0.5)
         inputs = array_ops.ones((5, 3))
         dropped = dp.apply(inputs, training=True)
         sess.run(variables.global_variables_initializer())
         np_output = sess.run(dropped)
         self.assertAlmostEqual(0., np_output.min())
         dropped = dp.apply(inputs, training=False)
         np_output = sess.run(dropped)
         self.assertAllClose(np.ones((5, 3)), np_output)
Пример #9
0
 def testTrainingLayer(self):
     net = network.Sequential([core.Dropout(0.99999)])
     two = constant_op.constant(2.0)
     self.assertEqual(2.0, net(two).numpy())
     self.assertEqual(2.0, net(two, training=False).numpy())
     for _ in range(20):
         with_dropout = net(two, training=True).numpy()
         self.assertIn(with_dropout, [0.0, 2.0])
         if with_dropout == 0.0:
             return
     # Should only fail spuriously 1 in 10^100 runs.
     self.fail("Didn't see dropout happen after 20 tries.")
Пример #10
0
def dropout(keep_prob=0.5, noise_shape=None, name=None):
    """Returns a dropout op applied to the input.
    With probability `keep_prob`, outputs the input element scaled up by
    `1 / keep_prob`, otherwise outputs `0`.  The scaling is so that the
    expected sum is unchanged.
    Args:
    inputs: The tensor to pass to the nn.dropout op.
    keep_prob: A scalar `Tensor` with the same type as x. The probability
      that each element is kept.
    noise_shape: A 1-D `Tensor` of type `int32`, representing the
      shape for randomly generated keep/drop flags.
    is_training: A bool `Tensor` indicating whether or not the model
      is in training mode. If so, dropout is applied and values scaled.
      Otherwise, inputs is returned.
    outputs_collections: Collection to add the outputs.
    scope: Optional scope for name_scope.
    Returns:
    A tensor representing the output of the operation.
    """
    layer = core_layers.Dropout(rate=1 - keep_prob,
                                noise_shape=noise_shape,
                                name=name,
                                _scope=name)
    return layer
Пример #11
0
 def testDropoutProperties(self):
     dp = core_layers.Dropout(0.5, name='dropout')
     self.assertEqual(dp.rate, 0.5)
     self.assertEqual(dp.noise_shape, None)
     dp.apply(array_ops.ones(()))
     self.assertEqual(dp.name, 'dropout')
Пример #12
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               input_layer_partitioner,
               batch_norm,
               name=None,
               **kwargs):
    super(_DNNModel, self).__init__(name=name, **kwargs)
    if feature_column_lib.is_feature_column_v2(feature_columns):
      self._input_layer = feature_column_lib.DenseFeatures(
          feature_columns=feature_columns, name='input_layer')
    else:
      self._input_layer = feature_column.InputLayer(
          feature_columns=feature_columns,
          name='input_layer',
          create_scope_now=False)

    self._add_layer(self._input_layer, 'input_layer')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        hidden_layer = core_layers.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope,
            _scope=hidden_layer_scope)
        self._add_layer(hidden_layer, hidden_layer_scope.name)
        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = core_layers.Dropout(rate=self._dropout)
          self._add_layer(dropout_layer, dropout_layer.name)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_layer = normalization.BatchNormalization(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name='batchnorm_%d' % layer_id,
              _scope='batchnorm_%d' % layer_id)
          self._add_layer(batch_norm_layer, batch_norm_layer.name)
          self._batch_norm_layers.append(batch_norm_layer)

    with variable_scope.variable_scope('logits') as logits_scope:
      self._logits_layer = core_layers.Dense(
          units=units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope,
          _scope=logits_scope)
      self._add_layer(self._logits_layer, logits_scope.name)
      self._logits_scope_name = logits_scope.name
    self._input_layer_partitioner = input_layer_partitioner
Пример #13
0
 def testDropoutProperties(self):
     dp = core_layers.Dropout(0.5)
     self.assertEqual(dp.rate, 0.5)
     self.assertEqual(dp.name, 'dropout')
     self.assertEqual(dp.noise_shape, None)
Пример #14
0
    def __init__(self,
                 units,
                 hidden_units,
                 feature_columns,
                 activation_fn,
                 dropout,
                 batch_norm,
                 name=None,
                 **kwargs):
        super(_DNNModelV2, self).__init__(name=name, **kwargs)

        # Add this name_scope for backward compatibility, as previously it's used
        # in variable_scope
        with ops.name_scope(
                'input_from_feature_columns') as input_feature_column_scope:
            layer_name = input_feature_column_scope + 'input_layer'
            if feature_column_lib.is_feature_column_v2(feature_columns):
                self._input_layer = feature_column_lib.DenseFeatures(
                    feature_columns=feature_columns, name=layer_name)
            else:
                self._input_layer = feature_column.InputLayer(
                    feature_columns=feature_columns,
                    name=layer_name,
                    create_scope_now=False)

        self._add_layer(self._input_layer, self._input_layer.name)

        self._dropout = dropout
        self._batch_norm = batch_norm

        self._hidden_layers = []
        self._dropout_layers = []
        self._batch_norm_layers = []
        self._hidden_layer_scope_names = []
        for layer_id, num_hidden_units in enumerate(hidden_units):
            with ops.name_scope('hiddenlayer_%d' %
                                layer_id) as hidden_layer_scope:
                # Get scope name without the trailing slash.
                hidden_shared_name = _name_from_scope_name(hidden_layer_scope)
                hidden_layer = core_layers.Dense(
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_shared_name)
                self._add_layer(hidden_layer, hidden_shared_name)
                self._hidden_layer_scope_names.append(hidden_shared_name)
                self._hidden_layers.append(hidden_layer)
                if self._dropout is not None:
                    dropout_layer = core_layers.Dropout(rate=self._dropout)
                    self._add_layer(dropout_layer, dropout_layer.name)
                    self._dropout_layers.append(dropout_layer)
                if self._batch_norm:
                    batch_norm_name = hidden_shared_name + '/batchnorm_%d' % layer_id
                    batch_norm_layer = normalization.BatchNormalization(
                        # The default momentum 0.99 actually crashes on certain
                        # problem, so here we use 0.999, which is the default of
                        # tf.contrib.layers.batch_norm.
                        momentum=0.999,
                        trainable=True,
                        name=batch_norm_name)
                    self._add_layer(batch_norm_layer, batch_norm_name)
                    self._batch_norm_layers.append(batch_norm_layer)

        with ops.name_scope('logits') as logits_scope:
            logits_shared_name = _name_from_scope_name(logits_scope)
            self._logits_layer = core_layers.Dense(
                units=units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_shared_name)
            self._add_layer(self._logits_layer, logits_shared_name)
            self._logits_scope_name = logits_shared_name
Пример #15
0
    def __init__(self, cell, attention_mechanism, keep_prob, training, attention_layer_size=None, alignment_history=False, cell_input_fn=None,
                output_attention=True, initial_cell_state=None, name=None):
        super(MyAttentionWrapper, self).__init__(cell, attention_mechanism, attention_layer_size, alignment_history, cell_input_fn,
                output_attention, initial_cell_state, name)
        
        self.keep_prob = keep_prob
        self.training = training
        
        super(tf.contrib.seq2seq.AttentionWrapper, self).__init__(name=name)
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("cell must be an RNNCell, saw type: %s" % type(cell).__name__)
        if isinstance(attention_mechanism, (list, tuple)):
            self._is_multi = True
            attention_mechanisms = attention_mechanism
            for attention_mechanism in attention_mechanisms:
                if not isinstance(attention_mechanism, AttentionMechanism):
                    raise TypeError("attention_mechanism must contain only instances of "
                                    "AttentionMechanism, saw type: %s" % type(attention_mechanism).__name__)
        else:
            self._is_multi = False
            if not isinstance(attention_mechanism, tf.contrib.seq2seq.AttentionMechanism):
                raise TypeError("attention_mechanism must be an AttentionMechanism or list of "
                                "multiple AttentionMechanism instances, saw type: %s" % type(attention_mechanism).__name__)
            attention_mechanisms = (attention_mechanism,)

        if cell_input_fn is None:
            cell_input_fn = (lambda inputs, attention: tf.concat([inputs, attention], -1))
        else:
            if not callable(cell_input_fn):
                raise TypeError("cell_input_fn must be callable, saw type: %s" % type(cell_input_fn).__name__)

        if attention_layer_size is not None:
            attention_layer_sizes = tuple(attention_layer_size if isinstance(attention_layer_size, (list, tuple))
                                              else (attention_layer_size,))
            if len(attention_layer_sizes) != len(attention_mechanisms):
                raise ValueError("If provided, attention_layer_size must contain exactly one "
                                "integer per attention_mechanism, saw: %d vs %d" % (len(attention_layer_sizes), len(attention_mechanisms)))
            self._attention_layers = tuple(layers_core.Dense(attention_layer_size, name="attention_layer", use_bias=True,
                    activation=tf.tanh) for attention_layer_size in attention_layer_sizes)
            self._attention_dropout_layers = tuple(layers_core.Dropout(rate=1-self.keep_prob, name="attention_dropout_layer")
                                                  for attention_layer_size in attention_layer_sizes)
            self._attention_layer_size = sum(attention_layer_sizes)
        else:
            self._attention_layers = None
            self._attention_dropout_layers = None
            self._attention_layer_size = sum(attention_mechanism.values.get_shape()[-1].value
                                                      for attention_mechanism in attention_mechanisms)
            
        self._cell = cell
        self._attention_mechanisms = attention_mechanisms
        self._cell_input_fn = cell_input_fn
        self._output_attention = output_attention
        self._alignment_history = alignment_history
        with tf.name_scope(name, "AttentionWrapperInit"):
            if initial_cell_state is None:
                self._initial_cell_state = None
            else:
                final_state_tensor = nest.flatten(initial_cell_state)[-1]
                state_batch_size = (final_state_tensor.shape[0].value or tf.shape(final_state_tensor)[0])
                error_message = ('custom error msg:0')
                with tf.control_dependencies(
                    self._batch_size_checks(state_batch_size, error_message)):
                    self._initial_cell_state = nest.map_structure(lambda s: tf.identity(s, name="check_initial_cell_state"),
                                                              initial_cell_state)
        self.attention_layer = layers_core.Dense(512, activation=tf.tanh)