示例#1
0
 def testEmbeddingWrapper(self):
     with self.test_session() as sess:
         with tf.variable_scope("root",
                                initializer=tf.constant_initializer(0.5)):
             x = tf.zeros([1, 1], dtype=tf.int32)
             m = tf.zeros([1, 2])
             g, new_m = rnn_cell.EmbeddingWrapper(rnn_cell.GRUCell(2), 3)(x,
                                                                          m)
             sess.run([tf.variables.initialize_all_variables()])
             res = sess.run([g, new_m], {
                 x.name: np.array([[1]]),
                 m.name: np.array([[0.1, 0.1]])
             })
             self.assertEqual(res[1].shape, (1, 2))
             # The numbers in results were not calculated, this is just a smoke test.
             self.assertAllClose(res[0], [[0.17139, 0.17139]])
示例#2
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=tf.float32,
                                scope=None):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    encoder_outputs, encoder_states = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [tf.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = tf.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, states1 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True)
      tf.get_variable_scope().reuse_variables()
      outputs2, states2 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False)

      outputs = tf.control_flow_ops.cond(feed_previous,
                                         lambda: outputs1, lambda: outputs2)
      states = tf.control_flow_ops.cond(feed_previous,
                                        lambda: states1, lambda: states2)
      return outputs, states
示例#3
0
memory_dim = 100

inp = [
    tf.placeholder(tf.int32, shape=(batch_size, ), name="inp%i" % t)
    for t in range(seq_length)
]
labels = [
    tf.placeholder(tf.int32, shape=(batch_size, ), name="labels%i" % t)
    for t in range(seq_length)
]
weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels]
prev_mem = tf.zeros((batch_size, memory_dim))

cell = rnn_cell.GRUCell(memory_dim)
cell = rnn_cell.EmbeddingWrapper(cell, vocab_size)

enc_outputs, enc_states = rnn.rnn(cell, inp, dtype=tf.float32)

with tf.variable_scope("RNN/EmbeddingWrapper", reuse=True):
    embeddings = tf.get_variable("embedding")
    inp_embedded = [tf.nn.embedding_lookup(embeddings, inp_t) for inp_t in inp]

cell = rnn_cell.GRUCell(memory_dim)
attn_states = tf.concat(
    1, [tf.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs])
dec_inp = [
    tf.zeros((batch_size, cell.input_size), dtype=tf.float32)
    for _ in range(seq_length)
]