def embedding_rnn_seq2seq(encoder_inputs,
                          decoder_inputs,
                          cell,
                          num_encoder_symbols,
                          num_decoder_symbols,
                          embedding_size,
                          output_projection=None,
                          feed_previous=False,
                          dtype=None,
                          scope=None):
    with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope:
        if dtype is not None:
            scope.set_dtype(dtype)
        else:
            dtype = scope.dtype
        # Encoder
        encoder_cell = rnn.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)
        # Decoder
        if output_projection is None:
            cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)
        if isinstance(feed_previous, bool):
            return embedding_rnn_decoder(decoder_inputs,
                encoder_state,
                cell,
                num_decoder_symbols,
                embedding_size,
                output_projection=output_projection,
                feed_previous=feed_previous)
        # 如果feed_previous是张量,我们构造2个图并进行cond
        def decoder(feed_previous_bool):
            if feed_previous_bool:
                reuse = None
            else:
                reuse = True
            with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols,
                    embedding_size, output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list
        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(decoder_inputs)    # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Пример #2
0
 def testEmbeddingWrapperWithDynamicRnn(self):
     with self.test_session() as sess:
         with variable_scope.variable_scope("root"):
             inputs = ops.convert_to_tensor([[[0], [0]]],
                                            dtype=dtypes.int64)
             input_lengths = ops.convert_to_tensor([2], dtype=dtypes.int64)
             embedding_cell = contrib_rnn.EmbeddingWrapper(
                 rnn_cell_impl.BasicLSTMCell(1, state_is_tuple=True),
                 embedding_classes=1,
                 embedding_size=2)
             outputs, _ = rnn.dynamic_rnn(cell=embedding_cell,
                                          inputs=inputs,
                                          sequence_length=input_lengths,
                                          dtype=dtypes.float32)
             sess.run([variables_lib.global_variables_initializer()])
             # This will fail if output's dtype is inferred from input's.
             sess.run(outputs)
Пример #3
0
 def testEmbeddingWrapper(self):
   with self.test_session() as sess:
     with variable_scope.variable_scope(
         "root", initializer=init_ops.constant_initializer(0.5)):
       x = array_ops.zeros([1, 1], dtype=dtypes.int32)
       m = array_ops.zeros([1, 2])
       embedding_cell = contrib_rnn.EmbeddingWrapper(
           rnn_cell_impl.GRUCell(2), embedding_classes=3, embedding_size=2)
       self.assertEqual(embedding_cell.output_size, 2)
       g, new_m = embedding_cell(x, m)
       sess.run([variables_lib.global_variables_initializer()])
       res = sess.run(
           [g, new_m],
           {x.name: np.array([[1]]),
            m.name: np.array([[0.1, 0.1]])})
       self.assertEqual(res[1].shape, (1, 2))
       # The numbers in results were not calculated, this is just a smoke test.
       self.assertAllClose(res[0], [[0.17139, 0.17139]])
Пример #4
0
 def create_cell_scopes(self):
     # self.enc_cells_text = rnn_cell.EmbeddingWrapper(self.cell_type(self.cell_size), self.decoder_words,self.text_embedding_size)
     self.enc_cells_text = rnn.EmbeddingWrapper(
         self.cell_type(self.cell_size), self.decoder_words,
         self.text_embedding_size)
     self.enc_scope_text = "encoder_text"
     max_val = np.sqrt(6. /
                       (self.image_rep_size + self.image_embedding_size))
     self.W_enc_img = tf.Variable(
         tf.random_uniform([self.image_rep_size, self.image_embedding_size],
                           -1. * max_val, max_val),
         name="W_enc_img")
     self.b_enc_img = tf.Variable(tf.constant(
         0., shape=[self.image_embedding_size]),
                                  name="b_enc_img")
     self.enc_scope_img = "encoder_img"
     self.enc_cells_utter = self.cell_type(self.cell_size)
     self.enc_scope_utter = "encoder_utter"
     if self.task_type == "text":
         self.dec_cells_text = self.cell_type(self.cell_size)
         self.dec_scope_text = "decoder_text"
     if self.task_type == "image":
         self.tgt_scope_img = "target_encoder_img"
         self.W_enc_tgt_img = tf.Variable(tf.random_uniform(
             [self.image_rep_size, self.image_embedding_size],
             -1. * max_val, max_val),
                                          name="W_enc_tgt_img")
         self.b_enc_tgt_img = tf.Variable(tf.constant(
             0., shape=[self.image_embedding_size]),
                                          name="b_enc_tgt_img")
         max_val = np.sqrt(6. /
                           (self.cell_size + self.image_embedding_size))
         self.proj_scope_utter = "proj_utter"
         self.W_proj_utter = tf.Variable(
             tf.random_uniform([self.cell_size, self.image_embedding_size],
                               -1. * max_val, max_val),
             name="W_proj_utter")
         self.b_proj_utter = tf.Variable(tf.constant(
             0., shape=[self.image_embedding_size]),
                                         name="b_proj_utter")
def embedding_attention_encoder(encoder_inputs,
                                cell,
                                num_encoder_symbols,
                                embedding_size,
                                dtype=None,
                                scope=None):
    """Embedding sequence-to-sequence model with attention.

    This model first embeds encoder_inputs by a newly created embedding (of shape
    [num_encoder_symbols x input_size]). Then it runs an RNN to encode
    embedded encoder_inputs into a state vector. It keeps the outputs of this
    RNN at every step to use for attention later. Next, it embeds decoder_inputs
    by another newly created embedding (of shape [num_decoder_symbols x
    input_size]). Then it runs attention decoder, initialized with the last
    encoder state, on embedded decoder_inputs and attending to encoder outputs.

    Warning: when output_projection is None, the size of the attention vectors
    and variables will be made proportional to num_decoder_symbols, can be large.

    Args:
      encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
      cell: rnn_cell.RNNCell defining the cell function and size.
      num_encoder_symbols: Integer; number of symbols on the encoder side.
      num_decoder_symbols: Integer; number of symbols on the decoder side.
      embedding_size: Integer, the length of the embedding vector for each symbol.
      num_heads: Number of attention heads that read from attention_states.
      output_projection: None or a pair (W, B) of output projection weights and
        biases; W has shape [output_size x num_decoder_symbols] and B has
        shape [num_decoder_symbols]; if provided and feed_previous=True, each
        fed previous output will first be multiplied by W and added B.
      feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
        of decoder_inputs will be used (the "GO" symbol), and all other decoder
        inputs will be taken from previous outputs (as in embedding_rnn_decoder).
        If False, decoder_inputs are used as given (the standard decoder case).
      dtype: The dtype of the initial RNN state (default: tf.float32).
      scope: VariableScope for the created subgraph; defaults to
        "embedding_attention_seq2seq".
      initial_state_attention: If False (default), initial attentions are zero.
        If True, initialize the attentions from the initial state and attention
        states.

    Returns:
      A tuple of the form (outputs, state), where:
        outputs: A list of the same length as decoder_inputs of 2D Tensors with
          shape [batch_size x num_decoder_symbols] containing the generated
          outputs.
        state: The state of each decoder cell at the final time-step.
          It is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    with variable_scope.variable_scope(
                    scope or "embedding_attention_encoder", dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        encoder_cell = rnn.EmbeddingWrapper(
            cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(
            encoder_cell, encoder_inputs, dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                      for e in encoder_outputs]
        attention_states = array_ops.concat(1, top_states)

        return encoder_state, attention_states
def one2many_rnn_seq2seq(encoder_inputs,
                         decoder_inputs_dict,
                         cell,
                         num_encoder_symbols,
                         num_decoder_symbols_dict,
                         embedding_size,
                         feed_previous=False,
                         dtype=None,
                         scope=None):
    outputs_dict = {}
    state_dict = {}

    with variable_scope.variable_scope(
            scope or "one2many_rnn_seq2seq", dtype=dtype) as scope:
        dtype = scope.dtype

        # Encoder.
        encoder_cell = rnn.EmbeddingWrapper(
            cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype)

        # Decoder.
        for name, decoder_inputs in decoder_inputs_dict.items():
            num_decoder_symbols = num_decoder_symbols_dict[name]

            with variable_scope.variable_scope("one2many_decoder_" + str(name)) as scope:
                decoder_cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)
                if isinstance(feed_previous, bool):
                    outputs, state = embedding_rnn_decoder(
                        decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols,
                        embedding_size, feed_previous=feed_previous)
                else:
                    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
                    def filled_embedding_rnn_decoder(feed_previous):
                        """The current decoder with a fixed feed_previous parameter."""
                        # pylint: disable=cell-var-from-loop
                        reuse = None if feed_previous else True
                        vs = variable_scope.get_variable_scope()
                        with variable_scope.variable_scope(vs, reuse=reuse):
                            outputs, state = embedding_rnn_decoder(
                                decoder_inputs, encoder_state, decoder_cell,
                                num_decoder_symbols, embedding_size,
                                feed_previous=feed_previous)
                        # pylint: enable=cell-var-from-loop
                        state_list = [state]
                        if nest.is_sequence(state):
                            state_list = nest.flatten(state)
                        return outputs + state_list

                    outputs_and_state = control_flow_ops.cond(
                        feed_previous,
                        lambda: filled_embedding_rnn_decoder(True),
                        lambda: filled_embedding_rnn_decoder(False))
                    # Outputs length is the same as for decoder inputs.
                    outputs_len = len(decoder_inputs)
                    outputs = outputs_and_state[:outputs_len]
                    state_list = outputs_and_state[outputs_len:]
                    state = state_list[0]
                    if nest.is_sequence(encoder_state):
                        state = nest.pack_sequence_as(structure=encoder_state,
                                                      flat_sequence=state_list)
            outputs_dict[name] = outputs
            state_dict[name] = state

    return outputs_dict, state_dict
def embedding_attention_seq2seq(encoder_inputs,  # [T, batch_size]
                                decoder_inputs,  # [T, batch_size]
                                cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,     # attention的抽头数量
                                output_projection=None,  #decoder的投影矩阵
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False,
                                loop_fn_factory=_extract_argmax_and_embed):
    """
    :param encoder_inputs: encoder的输入,int32型 id tensor list
    :param decoder_inputs: decoder的输入,int32型id tensor list
    :param cell: RNN_Cell的实例
    :param num_encoder_symbols: 编码的符号数,即词表大小
    :param num_decoder_symbols: 解码的符号数,即词表大小
    :param embedding_size: 词向量的维度
    :param num_heads: attention的抽头数量,一个抽头算一种加权求和方式
    :param output_projection: decoder的output向量投影到词表空间时,用到的投影矩阵和偏置项(W, B);W的shape是[output_size, num_decoder_symbols],B的shape是[num_decoder_symbols];若此参数存在且feed_previous=True,上一个decoder的输出先乘W再加上B作为下一个decoder的输入
    :param feed_previous: 若为True, 只有第一个decoder的输入(“GO"符号)有用,所有的decoder输入都依赖于上一步的输出;一般在测试时用
    :param dtype:
    :param scope:
    :param initial_state_attention: 默认为False, 初始的attention是零;若为True,将从initial state和attention states开始attention
    :param loop_fn_factory:
    :return:
    """
    with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.
        # 创建了一个embedding matrix.
        # 计算encoder的output和state
        # 生成attention states,用于计算attention
        encoder_cell = rnn.EmbeddingWrapper(  # EmbeddingWrapper, 是RNNCell的前面加一层embedding,作为encoder_cell, input就可以是word的id.
            cell, embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(
            encoder_cell, encoder_inputs, dtype=dtype)  #  [T,batch_size,size]

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [array_ops.reshape(e, [-1, 1, cell.output_size])
                      for e in encoder_outputs]  # T * [batch_size, 1, size]
        attention_states = array_ops.concat(top_states, 1)  # [batch_size,T,size]

        # Decoder.
        # 生成decoder的cell,通过OutputProjectionWrapper类对输入参数中的cell实例包装实现
        output_size = None
        if output_projection is None:
            cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols)  # OutputProjectionWrapper将输出映射成想要的维度
            output_size = num_decoder_symbols
        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention,
                loop_fn_factory=loop_fn_factory)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope:
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention,
                    loop_fn_factory=loop_fn_factory)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = control_flow_ops.cond(feed_previous,
                                                  lambda: decoder(True),
                                                  lambda: decoder(False))
        outputs_len = len(decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Пример #8
0
def embedding_attention_seq2seq(encoder_inputs,
                                decoder_inputs,
                                enc_cell,
                                dec_cell,
                                num_encoder_symbols,
                                num_decoder_symbols,
                                embedding_size,
                                num_heads=1,
                                output_projection=None,
                                feed_previous=False,
                                dtype=None,
                                scope=None,
                                initial_state_attention=False):
    """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Warning: when output_projection is None, the size of the attention vectors
  and variables will be made proportional to num_decoder_symbols, can be large.

  Args:
    encoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: A list of 1D int32 Tensors of shape [batch_size].
    cell: tf.nn.rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: Integer; number of symbols on the encoder side.
    num_decoder_symbols: Integer; number of symbols on the decoder side.
    embedding_size: Integer, the length of the embedding vector for each symbol.
    num_heads: Number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x num_decoder_symbols] containing the generated
        outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].
  """
    with tf.variable_scope(scope or "embedding_attention_seq2seq",
                           dtype=dtype) as scope:
        dtype = scope.dtype
        # Encoder.

        encoder_cell = enc_cell

        encoder_cell = rnn.EmbeddingWrapper(
            encoder_cell,
            embedding_classes=num_encoder_symbols,
            embedding_size=embedding_size)
        encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell,
                                                        encoder_inputs,
                                                        dtype=dtype)

        # First calculate a concatenation of encoder outputs to put attention on.
        top_states = [
            tf.reshape(e, [-1, 1, encoder_cell.output_size])
            for e in encoder_outputs
        ]
        attention_states = tf.concat(top_states, 1)

        # Decoder.
        output_size = None
        if output_projection is None:
            dec_cell = rnn.OutputProjectionWrapper(dec_cell,
                                                   num_decoder_symbols)
            output_size = num_decoder_symbols

        if isinstance(feed_previous, bool):
            return embedding_attention_decoder(
                decoder_inputs,
                encoder_state,
                attention_states,
                dec_cell,
                num_decoder_symbols,
                embedding_size,
                num_heads=num_heads,
                output_size=output_size,
                output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)

        # If feed_previous is a Tensor, we construct 2 graphs and use cond.
        def decoder(feed_previous_bool):
            reuse = None if feed_previous_bool else True
            with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
                outputs, state = embedding_attention_decoder(
                    decoder_inputs,
                    encoder_state,
                    attention_states,
                    dec_cell,
                    num_decoder_symbols,
                    embedding_size,
                    num_heads=num_heads,
                    output_size=output_size,
                    output_projection=output_projection,
                    feed_previous=feed_previous_bool,
                    update_embedding_for_previous=False,
                    initial_state_attention=initial_state_attention)
                state_list = [state]
                if nest.is_sequence(state):
                    state_list = nest.flatten(state)
                return outputs + state_list

        outputs_and_state = tf.cond(feed_previous, lambda: decoder(True),
                                    lambda: decoder(False))
        outputs_len = len(
            decoder_inputs)  # Outputs length same as decoder inputs.
        state_list = outputs_and_state[outputs_len:]
        state = state_list[0]
        if nest.is_sequence(encoder_state):
            state = nest.pack_sequence_as(structure=encoder_state,
                                          flat_sequence=state_list)
        return outputs_and_state[:outputs_len], state
Пример #9
0
    def __init__(self, config, vocab_size, name_scope, dtype=tf.float32):
        # with tf.variable_scope(name_or_scope=scope_name):
        emb_dim = config.embed_dim
        num_layers = config.num_layers
        # vocab_size = config.vocab_size
        # max_len = config.max_len
        num_class = config.num_class
        buckets = config.buckets
        self.lr = config.lr
        self.global_step = tf.Variable(initial_value=0, trainable=False)

        self.query = []
        self.answer = []
        for i in range(buckets[-1][0]):
            self.query.append(
                tf.placeholder(dtype=tf.int32,
                               shape=[None],
                               name="query{0}".format(i)))
        for i in range(buckets[-1][1]):
            self.answer.append(
                tf.placeholder(dtype=tf.int32,
                               shape=[None],
                               name="answer{0}".format(i)))

        self.target = tf.placeholder(dtype=tf.int64,
                                     shape=[None],
                                     name="target")

        # encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(emb_dim)
        encoder_cell = tf.nn.rnn_cell.LSTMCell(emb_dim)
        encoder_mutil = tf.nn.rnn_cell.MultiRNNCell([encoder_cell] *
                                                    num_layers)
        encoder_emb = rnn.EmbeddingWrapper(encoder_mutil,
                                           embedding_classes=vocab_size,
                                           embedding_size=emb_dim)

        # context_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=emb_dim)
        context_cell = tf.nn.rnn_cell.LSTMCell(num_units=emb_dim)
        context_multi = tf.nn.rnn_cell.MultiRNNCell([context_cell] *
                                                    num_layers)

        self.b_query_state = []
        self.b_answer_state = []
        self.b_state = []
        self.b_logits = []
        self.b_loss = []
        # self.b_cost = []
        self.b_train_op = []
        for i, bucket in enumerate(buckets):
            with tf.variable_scope(name_or_scope="Hier_RNN_encoder",
                                   reuse=True if i > 0 else None) as var_scope:
                query_output, query_state = rnn.static_rnn(
                    encoder_emb,
                    inputs=self.query[:bucket[0]],
                    dtype=tf.float32)
                # output [max_len, batch_size, emb_dim]   state [num_layer, 2, batch_size, emb_dim]
                var_scope.reuse_variables()
                answer_output, answer_state = rnn.static_rnn(
                    encoder_emb,
                    inputs=self.answer[:bucket[1]],
                    dtype=tf.float32)
                self.b_query_state.append(query_state)
                self.b_answer_state.append(answer_state)
                context_input = [query_state[-1][1], answer_state[-1][1]]

            with tf.variable_scope(name_or_scope="Hier_RNN_context",
                                   reuse=True if i > 0 else None):
                output, state = rnn.static_rnn(context_multi,
                                               context_input,
                                               dtype=tf.float32)
                self.b_state.append(state)
                top_state = state[-1][1]  # [batch_size, emb_dim]

            with tf.variable_scope("Softmax_layer_and_output",
                                   reuse=True if i > 0 else None):
                softmax_w = tf.get_variable("softmax_w", [emb_dim, num_class],
                                            dtype=tf.float32)
                softmax_b = tf.get_variable("softmax_b", [num_class],
                                            dtype=tf.float32)
                logits = tf.matmul(top_state, softmax_w) + softmax_b
                self.b_logits.append(logits)

            with tf.name_scope("loss"):
                loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.target, logits=logits)
                mean_loss = tf.reduce_mean(loss)
                self.b_loss.append(mean_loss)

            with tf.name_scope("gradient_descent"):
                disc_params = [
                    var for var in tf.trainable_variables()
                    if name_scope in var.name
                ]
                grads, norm = tf.clip_by_global_norm(
                    tf.gradients(mean_loss, disc_params), config.max_grad_norm)
                # optimizer = tf.train.GradientDescentOptimizer(self.lr)
                optimizer = tf.train.AdamOptimizer(self.lr)
                train_op = optimizer.apply_gradients(
                    zip(grads, disc_params), global_step=self.global_step)
                self.b_train_op.append(train_op)

        all_variables = [
            v for v in tf.global_variables() if name_scope in v.name
        ]
        self.saver = tf.train.Saver(all_variables)