Пример #1
0
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                          num_encoder_symbols, num_decoder_symbols,
                          embedding_size, output_projection=None,
                          feed_previous=False, dtype=dtypes.float32,
                          scope=None, beam_search=True, beam_size=10):
  """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = core_rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)


    return embedding_rnn_decoder(
          decoder_inputs, encoder_state, cell, num_decoder_symbols,
          embedding_size, output_projection=output_projection,
          feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
Пример #2
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                beam_search=True,
                                beam_size=10):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = copy.deepcopy(cell)
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = core_rnn_cell.OutputProjectionWrapper(
                cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        return embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous,
            initial_state_attention=initial_state_attention,
            beam_search=beam_search,
            beam_size=beam_size)
Пример #3
0
    def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2,
                                                  output_cell,
                                                  num_encoder_symbols,
                                                  num_decoder_symbols, embedding_size, num_heads=4,
                                                  output_projection=None, feed_previous=False, dtype=None, scope=None,
                                                  initial_state_attention=False):

        with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope:
            # Encoder.
            encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)
            encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols,
                                                           embedding_size=embedding_size)

            encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1,
                                                                                                encoder_cell2,
                                                                                                encoder_inputs,
                                                                                                dtype=tf.float32)

            # First calculate a concatenation of encoder outputs to put attention on.
            top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in
                          encoder_outputs]

            attention_states = array_ops.concat(top_states, 1)

            # Concatenate states of both enocders
            encoder_state = encoder_state1 + encoder_state2

            # Decoder.
            output_size = None
            if output_projection is None:
                output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols)
                output_size = num_decoder_symbols

            assert isinstance(feed_previous, bool)
            return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states,
                                                       output_cell,
                                                       num_decoder_symbols, embedding_size, num_heads=num_heads,
                                                       output_size=output_size,
                                                       output_projection=output_projection,
                                                       feed_previous=feed_previous,
                                                       initial_state_attention=initial_state_attention)
Пример #4
0
def my_encoder(encoder_inputs,
               cell,
               num_encoder_symbols,
               embedding_size,
               dtype=None,
               scope=None):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)
    return encoder_outputs, encoder_state
Пример #5
0
 def __init__(self, size, num_layers, vocab_size, buckets):
     self.__name__ = 'StepGAN'
     self.cell = tf.nn.rnn_cell.MultiRNNCell(
         [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)])
     self.enc_cell = tf.nn.rnn_cell.MultiRNNCell(
         [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)])
     self.enc_cell = core_rnn_cell.EmbeddingWrapper(
         cell=self.enc_cell,
         embedding_classes=vocab_size,
         embedding_size=size)
     self.embedding = variable_scope.get_variable('embedding',
                                                  [vocab_size, size])
     self.D_W = tf.Variable(xavier_init([size * num_layers, 1]))
     self.D_b = tf.Variable(tf.zeros(shape=[1]))
     self.real_data = [
         tf.placeholder(tf.int32,
                        shape=[None],
                        name='realdata{0}'.format(i))
         for i in range(buckets[-1][1])
     ]
Пример #6
0
def embedding_rnn_encoder(encoder_inputs,
                          cell,
                          num_symbols,
                          embedding_size,
                          scope=None,
                          dtype=None):
    with variable_scope.variable_scope(scope or "embedding_rnn_encoder", dtype=dtype) as scope:
        dtype = scope.dtype

        # Note that we use a deep copy of the original cell
        encoder_cell = copy.deepcopy(cell)
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(
            encoder_cell, encoder_inputs, dtype=dtype)

        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs]
        attention_states = array_ops.concat(top_states, 1)

        return encoder_state, attention_states
Пример #7
0
def my_inf_encoder(decoder_inputs,
                   cell,
                   num_encoder_symbols,
                   embedding_size,
                   dtype=None,
                   scope=None):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype,
                                       reuse=True) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
    # don't share the parameters with prior encoder
    with variable_scope.variable_scope("embedding_inf_network",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        decoder_inputs,
                                                        dtype=dtype)
    return encoder_outputs, encoder_state
Пример #8
0
    def __init__(
            self,
            method,
            model,
            kdim,
            edim,
            kbembed_size,
            triples_num,
            size,
            num_layers,
            vocab_size,
            buckets,
            hops_num=1,  #TODO
            kgpath_len=1,  #TODO
            learning_rate=0.5,
            learning_rate_decay_factor=0.99,
            max_gradient_norm=5.0,
            feed_prev=False,
            batch_size=32,
            dtype=tf.float32):

        model_funcs = importlib.import_module('models.' + model)
        globals().update(model_funcs.__dict__)

        # for knowledge graph
        self.kdim = kdim
        self.edim = edim
        self.kbembed_size = kbembed_size
        self.triples_num = triples_num
        self.hops_num = hops_num  #TODO
        self.kgpath_len = kgpath_len  #TODO

        # basic
        self.size = size
        self.num_layers = num_layers
        self.vocab_size = vocab_size
        print('VOCABSIZE:{}'.format(vocab_size))
        self.buckets = buckets
        self.feed_prev = feed_prev
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=dtype)
        self.op_lr_decay = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        # main model
        self.cell = tf.nn.rnn_cell.MultiRNNCell(
            [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)])
        self.enc_cell = tf.nn.rnn_cell.MultiRNNCell(
            [tf.nn.rnn_cell.GRUCell(size) for _ in range(num_layers)])
        self.enc_cell = core_rnn_cell.EmbeddingWrapper(
            cell=self.enc_cell,
            embedding_classes=vocab_size,
            embedding_size=size)
        # input embedding
        self.embedding = variable_scope.get_variable('embedding',
                                                     [vocab_size, size])

        # encoder's placeholder
        self.encoder_inputs = []
        for bid in range(buckets[-1][0]):
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='encoder{0}'.format(bid)))
        self.seq_len = tf.placeholder(tf.int32,
                                      shape=[None],
                                      name='enc_seq_len')
        # decoder's placeholder
        self.decoder_inputs = []
        self.targets = []
        self.target_weights = []
        self.masks = []
        for bid in range(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='decoder{0}'.format(bid)))
            self.targets.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name='target{0}'.format(bid)))
            self.target_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name='weight{0}'.format(bid)))
            self.masks.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name='mask_unit{0}'.format(bid)))

        # TODO passed args funcs
        self.output_projection = build_out_proj(size, vocab_size, kdim)
        self.kg_projection = build_kg_proj(size, kdim)
        self.memA, self.memC = build_memnet(size, num_layers, kbembed_size,
                                            xavier_init)
        self.Tpred_W, self.Tpred_b = build_transit_mat(size, kdim, edim,
                                                       xavier_init)
        self.S, self.neA = hold_graph(kdim, edim, dtype)
        self.facts = hold_facts(triples_num, kbembed_size, dtype)
        self.kg_indices = hold_kg_indices()
        more_args = (self.Tpred_W, self.Tpred_b, self.kdim, self.edim,
                     self.neA, self.S, self.hops_num, self.kgpath_len,
                     self.kg_projection)
        mem_args = (self.batch_size, self.size, self.num_layers, self.hops_num,
                    self.facts, self.kg_indices, self.memA, self.memC)

        if method == 'TRAIN':

            self.enc_state = []
            self.losses = []
            self.logits = []

            self.decKB_losses = []
            self.decN_losses = []
            self.ptr_losses = []
            self.outputs = []
            self.a1s = []
            self.kdists = []
            self.Ndists = []
            self.Rdebugs = []

            for j, bucket in enumerate(buckets):

                with variable_scope.variable_scope(
                        variable_scope.get_variable_scope(),
                        reuse=True if j > 0 else None):

                    _, enc_state = \
                        encode(self.enc_cell, self.encoder_inputs[:bucket[0]], self.seq_len)
                    enc_state = enc_state_transform(enc_state, mem_args)

                    logits, hiddens, dec_state = \
                        decode(self.cell, enc_state, \
                               self.vocab_size, self.embedding, \
                               self.decoder_inputs[:bucket[1]], \
                               self.output_projection, \
                               bucket[1]+1, more_args, \
                               None, feed_prev=False, \
                               copy_transform=copy_transform)

                    outputs, a1s, kdists, Ndists, final_logits, Rdebug = copymech(
                        logits, self.output_projection, self.vocab_size,
                        self.kdim, more_args, mem_args, copy_transform)
                    loss = compute_loss(final_logits, self.targets[:bucket[1]],
                                        self.target_weights[:bucket[1]],
                                        self.output_projection,
                                        self.vocab_size)

                    self.enc_state.append(enc_state)
                    self.losses.append(loss)
                    self.logits.append(logits)

                    self.outputs.append(outputs)
                    self.a1s.append(a1s)
                    self.kdists.append(kdists)
                    self.Ndists.append(Ndists)
                    self.Rdebugs.append(Rdebug)

            # TODO check
            self.softmax_outputs, self.argmax_outputs = to_check(
                self.logits, self.outputs, self.output_projection)

            # update methods
            self.op_update = []
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
            params = tf.trainable_variables()
            print(params)
            for j in range(len(self.buckets)):
                gradients = tf.gradients(self.losses[j], params)
                clipped_gradients, _ = tf.clip_by_global_norm(
                    gradients, max_gradient_norm)
                self.op_update.append(
                    optimizer.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step))

        elif method == 'TEST':
            self.enc_state = []
            self.argmax_outputs = []
            self.logits = []

            self.a1s = []
            self.kdists = []
            self.Ndists = []
            self.Rdebugs = []

            for j, bucket in enumerate(buckets):

                with variable_scope.variable_scope(
                        variable_scope.get_variable_scope(),
                        reuse=True if j > 0 else None):

                    _, enc_state = \
                        encode(self.enc_cell, self.encoder_inputs[:bucket[0]], self.seq_len)
                    enc_state = enc_state_transform(enc_state, mem_args)

                    logits, argmax_outputs, hiddens, a1s, kdists, Ndists, Rdebugs = \
                        decode(self.cell, enc_state, \
                               self.vocab_size, self.embedding, \
                               self.decoder_inputs[:bucket[1]], \
                               self.output_projection, \
                               bucket[1], more_args, \
                               mem_args, feed_prev=True, \
                               loop_function=loop_function, \
                               copy_transform=copy_transform)

                self.enc_state.append(enc_state)
                self.argmax_outputs.append(argmax_outputs)
                self.logits.append(logits)

                self.a1s.append(a1s)
                self.kdists.append(kdists)
                self.Ndists.append(Ndists)
                self.Rdebugs.append(Rdebugs)

            params = tf.trainable_variables()
            print(params)

        # saver
        self.saver = tf.train.Saver(var_list=tf.trainable_variables(),
                                    max_to_keep=None,
                                    sharded=True)
Пример #9
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                enc_cell,
                                dec_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: tf.nn.rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype
    # Encoder.

    encoder_cell = enc_cell

    encoder_cell = core_rnn_cell.EmbeddingWrapper(
        encoder_cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    encoder_outputs, encoder_state = rnn.static_rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [
        array_ops.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs
    ]
    attention_states = array_ops.concat(top_states, 1)

    # Decoder.
    output_size = None
    if output_projection is None:
      dec_cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return tf.contrib.legacy_seq2seq.embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          dec_cell,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse):
        outputs, state = tf.contrib.legacy_seq2seq.embedding_attention_decoder(
            decoder_inputs,
            encoder_state,
            attention_states,
            dec_cell,
            num_decoder_symbols,
            embedding_size,
            num_heads=num_heads,
            output_size=output_size,
            output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False,
            initial_state_attention=initial_state_attention)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(
          structure=encoder_state, flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):
    """Embedding RNN sequence-to-sequence model.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs RNN decoder, initialized with the last
  encoder state, on embedded decoder_inputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: core_rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors. The
        output is of shape [batch_size x cell.output_size] when
        output_projection is not None (and represents the dense representation
        of predicted tokens). It is of shape [batch_size x num_decoder_symbols]
        when output_projection is None.
      state: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with variable_scope.variable_scope(scope
                                       or "embedding_rnn_seq2seq") as scope:
        if dtype is not None:
            scope.set_dtype(dtype)
        else:
            dtype = scope.dtype

        # Encoder.
        encoder_cell = copy.deepcopy(cell)
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = core_rnn.static_rnn(encoder_cell,
                                               encoder_inputs,
                                               dtype=dtype)

        # Decoder.
        if output_projection is None:
            cell = core_rnn_cell.OutputProjectionWrapper(
                cell, num_decoder_symbols)

        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                                         encoder_state,
                                         cell,
                                         num_decoder_symbols,
                                         embedding_size,
                                         output_projection=output_projection,
                                         feed_previous=feed_previous)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_rnn_decoder(
                    decoder_inputs,
                    encoder_state,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Пример #11
0
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell,
                         num_encoder_symbols, num_decoder_symbols_dict,
                         embedding_size, feed_previous=False,
                         dtype=dtypes.float32, scope=None):
  """One-to-many RNN sequence-to-sequence model (multi-task).

  This is a multi-task sequence-to-sequence model with one encoder and multiple
  decoders. Reference to multi-task sequence-to-sequence learning can be found
  here: http://arxiv.org/abs/1511.06114

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs_dict: A dictionany mapping decoder name (string) to
      the corresponding decoder_inputs; each decoder_inputs is a list of 1D
      Tensors of shape [batch_size]; num_decoders is defined as
      len(decoder_inputs_dict).
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols_dict: A dictionary mapping decoder name (string) to an
      integer specifying number of symbols for the corresponding decoder;
      len(num_decoder_symbols_dict) must be equal to num_decoders.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of
      decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial state for both the encoder and encoder
      rnn cells (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "one2many_rnn_seq2seq"

  Returns:
    A tuple of the form (outputs_dict, state_dict), where:
      outputs_dict: A mapping from decoder name (string) to a list of the same
        length as decoder_inputs_dict[name]; each element in the list is a 2D
        Tensors with shape [batch_size x num_decoder_symbol_list[name]]
        containing the generated outputs.
      state_dict: A mapping from decoder name (string) to the final state of the
        corresponding decoder RNN; it is a 2D Tensor of shape
        [batch_size x cell.state_size].
  """
  outputs_dict = {}
  state_dict = {}

  with variable_scope.variable_scope(scope or "one2many_rnn_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    for name, decoder_inputs in decoder_inputs_dict.items():
      num_decoder_symbols = num_decoder_symbols_dict[name]

      with variable_scope.variable_scope("one2many_decoder_" + str(name)):
        decoder_cell = rnn_cell.OutputProjectionWrapper(cell,
                                                        num_decoder_symbols)
        if isinstance(feed_previous, bool):
          outputs, state = embedding_rnn_decoder(
              decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols,
              embedding_size, feed_previous=feed_previous)
        else:
          # If feed_previous is a Tensor, we construct 2 graphs and use cond.
          def filled_embedding_rnn_decoder(feed_previous):
            # pylint: disable=cell-var-from-loop
            reuse = None if feed_previous else True
            vs = variable_scope.get_variable_scope()
            with variable_scope.variable_scope(vs, reuse=reuse):
              outputs, state = embedding_rnn_decoder(
                  decoder_inputs, encoder_state, decoder_cell,
                  num_decoder_symbols, embedding_size,
                  feed_previous=feed_previous)
            # pylint: enable=cell-var-from-loop
            return outputs + [state]
          outputs_and_state = control_flow_ops.cond(
              feed_previous,
              lambda: filled_embedding_rnn_decoder(True),
              lambda: filled_embedding_rnn_decoder(False))
          outputs = outputs_and_state[:-1]
          state = outputs_and_state[-1]

      outputs_dict[name] = outputs
      state_dict[name] = state

  return outputs_dict, state_dict
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,dec_cell,bi_lstm,attent,beam_search,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.
  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.
  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.
  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: core_rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.
  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(
      scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
    dtype = scope.dtype

    # Encoder.
    
    if bi_lstm == True:
        encoder_fw_cell = copy.deepcopy(cell)
        encoder_fw_cell = core_rnn_cell.EmbeddingWrapper(
        encoder_fw_cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)

        encoder_bw_cell = copy.deepcopy(cell)
        encoder_bw_cell = core_rnn_cell.EmbeddingWrapper(
        encoder_bw_cell,
        embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
        encoder_outputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(
        encoder_fw_cell,encoder_bw_cell, encoder_inputs, dtype=dtype)
        top_states = [
        array_ops.reshape(e, [-1, 1, cell.output_size*2]) for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)
        encoder_final_state_c = tf.concat(
        (output_state_fw.c, output_state_bw.c), 1)
        encoder_final_state_h = tf.concat(
        (output_state_fw.h, output_state_bw.h), 1)

        encoder_state = LSTMStateTuple(
        c=encoder_final_state_c,
        h=encoder_final_state_h
        )
    else:
        encoder_cell = copy.deepcopy(cell)
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = core_rnn.static_rnn(
        encoder_cell, encoder_inputs, dtype=dtype)
        top_states = [
        array_ops.reshape(e, [-1, 1, cell.output_size*1]) for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)
    
   

    # Decoder.
    output_size = None
    if output_projection is None:
      # make new decoder cell  
      cell = core_rnn_cell.OutputProjectionWrapper(dec_cell, num_decoder_symbols)
      output_size = num_decoder_symbols




    #if isinstance(feed_previous, bool):
    return embedding_attention_decoder(
          decoder_inputs,
          encoder_state,
          attention_states,
          cell,dec_cell,attent,beam_search,
          num_decoder_symbols,
          embedding_size,
          num_heads=num_heads,
          output_size=output_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention)
          
    '''
Пример #13
0
def embedding_attention_sampled_seq2seq(encoder_inputs,
                                        decoder_inputs,
                                        cell,
                                        num_encoder_symbols,
                                        num_decoder_symbols,
                                        embedding_size,
                                        num_heads=1,
                                        output_projection=None,
                                        feed_previous=False,
                                        dtype=None,
                                        scope=None,
                                        initial_state_attention=False):
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq",
                                       dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = core_rnn_cell.EmbeddingWrapper(
            cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = core_rnn.static_rnn(encoder_cell,
                                                             encoder_inputs,
                                                             dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            array_ops.reshape(e, [-1, 1, cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = array_ops.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            cell = core_rnn_cell.OutputProjectionWrapper(
                cell, num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Пример #14
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell_1,cell_2,
                                num_encoder_symbols, num_decoder_symbols,
                                embedding_size,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=dtypes.float32,
                                scope=None, initial_state_attention=False, beam_search =True, beam_size = 10 ):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with variable_scope.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(
        cell_1, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)#reuse=tf.get_variable_scope().reuse
    encoder_outputs, encoder_state = core_rnn.static_rnn(
        encoder_cell, encoder_inputs,
        #scope='embedding_attention_decoder/attention_decoder',
        dtype=dtype)
    print('####### embedding_attention_seq2seq scope: {}'.format(encoder_cell))
    print("Symbols")
    print(num_encoder_symbols)
    print(num_decoder_symbols)
    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [array_ops.reshape(e, [-1, 1, cell_1.output_size])
                  for e in encoder_outputs]
    attention_states = array_ops.concat(axis=1, values=top_states)
    print(attention_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell_2 = rnn_cell.OutputProjectionWrapper(cell_2, num_decoder_symbols)
      output_size = num_decoder_symbols
    return embedding_attention_decoder(
          decoder_inputs, encoder_state, attention_states, cell_2,
          num_decoder_symbols, embedding_size, num_heads=num_heads,
          output_size=output_size, output_projection=output_projection,
          feed_previous=feed_previous,
          initial_state_attention=initial_state_attention, beam_search=beam_search, beam_size=beam_size)
Пример #15
0
def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):

  with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
    if dtype is not None:
      scope.set_dtype(dtype)
    else:
      dtype = scope.dtype

    # Encoder.
    encoder_cell = core_rnn_cell.EmbeddingWrapper(
        cell, embedding_classes=num_encoder_symbols,
        embedding_size=embedding_size)
    _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

    # Decoder.
    if output_projection is None:
      cell = core_rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)

    if isinstance(feed_previous, bool):
      return embedding_rnn_decoder(
          decoder_inputs,
          encoder_state,
          cell,
          num_decoder_symbols,
          embedding_size,
          output_projection=output_projection,
          feed_previous=feed_previous,
          scope=scope)

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
    def decoder(feed_previous_bool):
      reuse = None if feed_previous_bool else True
      with variable_scope.variable_scope(
          variable_scope.get_variable_scope(), reuse=reuse) as scope:
        outputs, state = embedding_rnn_decoder(
            decoder_inputs, encoder_state, cell, num_decoder_symbols,
            embedding_size, output_projection=output_projection,
            feed_previous=feed_previous_bool,
            update_embedding_for_previous=False)
        state_list = [state]
        if nest.is_sequence(state):
          state_list = nest.flatten(state)
        return outputs + state_list

    outputs_and_state = control_flow_ops.cond(feed_previous,
                                              lambda: decoder(True),
                                              lambda: decoder(False))
    outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
    state_list = outputs_and_state[outputs_len:]
    state = state_list[0]
    if nest.is_sequence(encoder_state):
      state = nest.pack_sequence_as(structure=encoder_state,
                                    flat_sequence=state_list)
    return outputs_and_state[:outputs_len], state