def init_placeholders(self):
        """初始化训练、预测所需的变量
        """

        # 编码器输入,shape=(batch_size, time_step)
        # 有 batch_size 句话,每句话是最大长度为 time_step 的 index 表示
        self.encoder_inputs = tf.placeholder(dtype=tf.int32,
                                             shape=(self.batch_size, None),
                                             name='encoder_inputs')

        # 编码器长度输入,shape=(batch_size, 1)
        # 指的是 batch_size 句话每句话的长度
        self.encoder_inputs_length = tf.placeholder(
            dtype=tf.int32,
            shape=(self.batch_size, ),
            name='encoder_inputs_length')

        # 编码器输入,shape=(batch_size, time_step)
        # 有 batch_size 句话,每句话是最大长度为 time_step 的 index 表示
        self.x = tf.placeholder(dtype=tf.int32,
                                shape=(self.batch_size, None),
                                name='x')

        # 编码器长度输入,shape=(batch_size, 1)
        # 指的是 batch_size 句话每句话的长度
        self.xl = tf.placeholder(dtype=tf.int32,
                                 shape=(self.batch_size, ),
                                 name='xl')

        # 编码器的embedding
        with tf.device(_get_embed_device(self.input_vocab_size)):
            self.encoder_embeddings = tf.get_variable(
                name='embedding',
                shape=(self.input_vocab_size, self.embedding_size),
                initializer=self.initializer,
            )

        if self.mode == 'train':
            self.targets = tf.placeholder(dtype=tf.int64,
                                          shape=(self.batch_size, ),
                                          name='target')
示例#2
0
    def build_decoder(self, encoder_outputs, encoder_state):
        with tf.variable_scope('decoder') as decoder_scope:
            (
                self.decoder_cell,
                self.decoder_initial_state
            ) = self.build_decoder_cell(encoder_outputs, encoder_state)

            with tf.device(_get_embed_device(self.target_vocab_size)):
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(
                        tf.constant(0.0, shape=(self.target_vocab_size, self.embedding_size)),
                        trainable=True,
                        name='embeddings'
                    )

                    self.decoder_embeddings_placeholder =\
                        tf.placeholder(tf.float32, (self.target_vocab_size,
                                                    self.embedding_size))

                    self.decoder_embeddings_init = self.decoder_embeddings.assign(self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32
                    )

            self.decoder_output_projection = layers.Dense(self.target_vocab_size,
                                                          dtype=tf.float32,
                                                          use_bias=False,
                                                          name='decoder_output_projection')

            if self.mode == 'train':
                self.decoder_inputs_embdedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train
                )

                inputs = self.decoder_inputs_embdedded

                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper'
                )

                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state
                )

                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length
                )

                (
                    outputs,
                    self.final_state,
                    _
                ) = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                )

                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output
                )

                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks'
                )

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(
                    decoder_logits_train, axis=-1, name='decoder_pred_train'
                )

                self.train_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.decoder_inputs,
                    logits=decoder_logits_train)

                self.masks_rewards = self.masks * self.rewards

                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,
                    average_across_timesteps=True,
                    average_across_batch=True
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True
                )

                self.add_loss = self.loss + self.add_loss

            elif self.mode == 'decode':
                start_token = tf.tile(
                    [WordSequence.START],
                    [self.batch_size]
                )
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    return tf.nn.embedding_lookup(
                        self.decoder_embeddings,
                        inputs
                    )

                if not self.use_beamsearch_decode:
                    decoder_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_token,
                        end_token=end_token,
                        embedding=embed_and_input_proj
                    )

                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoder_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection
                    )
                else:
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_token,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection
                    )
                if self.max_decode_step is not None:
                    max_decoder_step = self.max_decode_step
                else:
                    max_decoder_step = tf.round(tf.reduce_max(
                        self.encoder_inputs_length
                    ) * 4)
                (
                    self.decoder_outputs_decode,
                    self.final_state
                ) = (seq2seq.dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=self.time_major,
                    maximum_iterations=max_decoder_step,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                ))

                if not self.use_beamsearch_decode:
                    dod = self.decoder_outputs_decode
                    self.decoder_pred_train = tf.transpose(
                        self.decoder_pred_decode, (1, 0)
                    )
                else:
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2)
                        )
                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode,
                        perm=[0, 2, 1]
                    )
                    dod = self.decoder_pred_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
示例#3
0
    def build_encoder(self):
        """
        构建编码器
        :return:
        """
        with tf.variable_scope('encoder'):
            encoder_cell = self.build_encoder_cell()

            with tf.device(_get_embed_device(self.input_vocab_size)):

                if self.pretrained_embedding:

                    self.encoder_embeddings = tf.Variable(
                        tf.constant(0.0,
                                    shape=(self.input_vocab_size, self.embedding_size)),
                        trainable=True,
                        name='embeddings')

                    self.encoder_embeddings_placeholder = tf.placeholder(tf.float32,
                                                                         (self.input_vocab_size, self.embedding_size)
                                                                         )

                    self.encoder_embeddings_init = self.encoder_embeddings.assign(self.encoder_embeddings_placeholder)

                else:
                    self.encoder_embeddings = tf.get_variable(name='embeddings',
                                                              shape=(self.input_vocab_size, self.embedding_size),
                                                              initializer=self.initializer,
                                                              dtype=tf.float32)

            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings,
                ids=self.encoder_inputs)

            if self.use_residual:
                self.encoder_inputs_embedded = layers.dense(self.encoder_inputs_embedded,
                                                            self.hidden_size,
                                                            use_bias=False,
                                                            name='encoder_residual_projection')

            inputs = self.encoder_inputs_embedded
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirection:
                (
                    encoder_outputs,
                    encoder_state
                ) = tf.nn.dynamic_rnn(
                                    cell=encoder_cell,
                                    inputs=inputs,
                                    sequence_length=self.encoder_inputs_length,
                                    dtype=tf.float32,
                                    time_major=self.time_major,
                                    parallel_iterations=self.parallel_iterations,
                                    swap_memory=True)
            else:
                # 多了合并操作
                encoder_cell_bw = self.build_encoder_cell()
                (
                    (encoder_fw_outputs, encoder_bw_outputs),
                    (encoder_fw_state, encoder_bw_state)
                ) = tf.nn.bidirectional_dynamic_rnn(
                    cell_bw=encoder_cell_bw,
                    cell_fw=encoder_cell,
                    inputs=inputs,
                    sequence_length=self.encoder_inputs_length,
                    dtype=tf.float32,
                    time_major=self.time_major,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True)

                encoder_outputs = tf.concat(
                    (encoder_bw_outputs, encoder_fw_outputs), 2)

                encoder_state = []
                for i in range(self.depth):
                    encoder_state.append(encoder_fw_state[i])
                    encoder_state.append(encoder_bw_state[i])
                encoder_state = tuple(encoder_state)

        return encoder_outputs, encoder_state
示例#4
0
    def build_decoder(self, encoder_outputs, encoder_state):
        """构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            #创建解码器单元
            (self.decoder_cell,self.decoder_initial_state)\
            = self.build_decoder_cell(encoder_outputs, encoder_state)

            # 解码器embedding 根据词表大小选择CPU还是GPU上训练
            with tf.device(_get_embed_device(self.target_vocab_size)):
                #如果是共享的embedding 则赋值,否则加载预训练 或者初始化进行后续的训练
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                #如果是预训练的embedding
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(
                        tf.constant(
                            0.0,
                            shape=(self.target_vocab_size,self.embedding_size)),
                            trainable=True,#是否可以被训练
                            name='embeddings')
                    self.decoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.target_vocab_size, self.embedding_size))
                    self.decoder_embeddings_init = self.decoder_embeddings.assign(
                            self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32
                    )
            
            #解码器的输出
            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,       #一共有词表大小个输出
                dtype=tf.float32,
                use_bias=False,
                name='decoder_output_projection'
            )

            if self.mode == 'train':
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train   #placeholder初始化时设定
                )
                inputs = self.decoder_inputs_embedded
                
                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))
                
                #帮助feed参数 一般用于训练阶段Decoder解码,辅助Decoder解码过程
                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper'
                )

                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,  #用之前定义的初始化单元的状态进行初始化
                )

                # decoder在当前batch下最大的time_steps
                max_decoder_length = tf.reduce_max(self.decoder_inputs_length)
                
                #定义动态解码的输出
                (outputs,self.final_state,_)\
                 = seq2seq.dynamic_decode(    #动态decoder
                    decoder=training_decoder,
                    output_time_major=self.time_major, #True是以time(seq_length)为第一维,False是以batch_size为第一维
                    impute_finished=True,         #追踪finished,如果一个序列已经finished,那么后面的每一步output为0
                    maximum_iterations=max_decoder_length,#最大迭代次数(可以理解为decoder最多可以生成几个词)
                    parallel_iterations=self.parallel_iterations,##while_loop的并行次数
                    swap_memory=True, ##True时,当遇到OOM(out of memory),是否把张量从显存转到内存
                    scope=decoder_scope)
                #在训练时将所有的结果在全连接层一次性做投影运算 可以提高效率官方提倡
                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output     #上面定义的解码器的输出
                )

                # masks: masking for valid and padded time steps,
                #tf.sequence_mask的作用是构建序列长度的mask标志 
                """
                tf.sequence_mask([1,2], 4)
                -->
                [[ True False False False]
                 [ True  True False False]]
                """
                # [batch_size, max_time_step + 1]
                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32, 
                    name='masks'
                )

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(decoder_logits_train,
                                            (1,0,2))
                #解码器训练时的预测输出 decoder_logits_train一共有词表大小个输出,现仅取值最大的那个下标即为预测的对应下标
                self.decoder_pred_train = tf.argmax(
                    decoder_logits_train, 
                    axis=-1,
                    name='decoder_pred_train')

                # 下面的一些变量用于特殊的学习训练
                # 自定义rewards,其实我这里是修改了masks 损失之类
                # train_entropy = cross entropy
                self.train_entropy = \
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.decoder_inputs,#标签
                        logits=decoder_logits_train)#预测

                self.masks_rewards = self.masks * self.rewards
                
                #seq2sqe中的损失函数 就是将各个时间步输出相加求平均 权重为mask 当句子长度短于最大长度,为0部分的权重为0
                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,    #这里权重跟下面的不同
                    average_across_timesteps=True,  #损失将除以总的权重
                    average_across_batch=True,     #损失将是总的损失处于批次大小
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                # 预测模式,非训练

                #对原数据进行扩展 参考
                #https://blog.csdn.net/tsyccnh/article/details/82459859
                start_tokens = tf.tile(
                    [WordSequence.START],
                    [self.batch_size]
                )
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    """输入层的投影层wrapper
                    将输入转换成对应词表对应下的embedding
                    """
                    return tf.nn.embedding_lookup(
                        self.decoder_embeddings,
                        inputs
                    )
                
                #如果不使用集束搜索解码 这里定义helper和decoder的结构
                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding:
                    # uses the argmax of the output
                    #贪婪搜索解码
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,#起始token
                        end_token=end_token,    #结束token
                        embedding=embed_and_input_proj  #已经将输入转换成对应的embedding
                    )
                    # Basic decoder performs greedy decoding at each time step
                    # print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection
                    )
                else:
                    #使用beamsearch解码
                    # Beamsearch is used to approximately
                    # find the most likely translation
                    # print("building beamsearch decoder..")
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection,
                    )
                
                
                #一般使用最大值
                if self.max_decode_step is not None:
                    max_decode_step = self.max_decode_step
                else:
                    # 默认 4 倍输入长度的输出解码
                    max_decode_step = tf.round(tf.reduce_max(
                        self.encoder_inputs_length) * 4)

                
                (    self.decoder_outputs_decode,#输出
                    self.final_state,        #最后的状态
                    _ # self.decoder_outputs_length_decode
                ) = seq2seq.dynamic_decode(
                    decoder=inference_decoder,            #这里包含了使用哪种解码方式
                    output_time_major=self.time_major,
                    # impute_finished=True,	# error occurs
                    maximum_iterations=max_decode_step,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope
                )
                
                #如果不使用beamsearch解码,使用贪婪解码
                 #调用dynamic_decode进行解码,decoder_outputs_decode是一个namedtuple,里面包含两项(rnn_outputs, sample_id)
                # rnn_output: [batch_size, decoder_targets_length, vocab_size],保存decode每个时刻每个单词的概率,可以用来计算loss
                # sample_id: [batch_size], tf.int32,保存最终的编码结果。可以表示最后的答案
                
                if not self.use_beamsearch_decode:

                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id #就是最终的答案

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))
                #如果使用beamsearch
                #参考 https://blog.csdn.net/liuchonge/article/details/79021938
                # 对于使用beam_search的时候,decoder_outputs_decode它里面包含两项(predicted_ids, beam_search_decoder_output)
                # predicted_ids: [batch_size, decoder_targets_length, beam_size],保存输出结果
                # beam_search_decoder_output: BeamSearchDecoderOutput instance namedtuple(scores, predicted_ids, parent_ids)
                # 所以对应只需要返回predicted_ids或者sample_id即可翻译成最终的结果
                else:
                    self.decoder_pred_decode = \
                        self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))

                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode,
                        perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
示例#5
0
    def build_encoder(self):
        """构建编码器
          返回编码器的输出以及各个层最后状态的输出
        """
        # print("构建编码器")
        with tf.variable_scope('encoder'):
            # 构建 encoder_cell
            encoder_cell = self.build_encoder_cell()

            # 编码器的embedding tf.device()用于指定在哪块gpu或者cpu上进行下列操作 tf.device('/gpu:1') 指定Session在第二块GPU上运行
            with tf.device(_get_embed_device(self.input_vocab_size)):

                # 加载训练好的embedding
                if self.pretrained_embedding:

                    self.encoder_embeddings = tf.Variable(
                        tf.constant(
                            0.0,
                            shape=(self.input_vocab_size, self.embedding_size)
                        ),
                        trainable=True,
                        name='embeddings'
                    )
                    self.encoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.input_vocab_size, self.embedding_size)
                    )
                    self.encoder_embeddings_init = \
                        self.encoder_embeddings.assign(self.encoder_embeddings_placeholder)

                else:
                    #根据initializer对变量进行初始化 get_variable为获取变量或者创建新变量
                    self.encoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.input_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32
                    )

            # embedded之后的输入 shape = (batch_size, time_step, embedding_size)
            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings,
                ids=self.encoder_inputs
            )
            
            #使用残差网络 则需要对输入先进行映射
            if self.use_residual:
                self.encoder_inputs_embedded = \
                    layers.dense(self.encoder_inputs_embedded,
                                 self.hidden_units,
                                 use_bias=False,
                                 name='encoder_residual_projection')

            #编码器输入
            inputs = self.encoder_inputs_embedded
            #time_major 会导致对应维度的内容不一样
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirectional:
                # 单向 RNN
                (    encoder_outputs,
                    encoder_state
                ) = tf.nn.dynamic_rnn(
                    cell=encoder_cell,
                    inputs=inputs,
                    sequence_length=self.encoder_inputs_length,
                    dtype=tf.float32,
                    time_major=self.time_major,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True
                )
            else:
                # 双向 RNN 比较麻烦
                #构建反向的编码器单元  optput就是输出 state就是比如LSTM里的H和C
                encoder_cell_bw = self.build_encoder_cell()
                (    (encoder_fw_outputs, encoder_bw_outputs),
                    (encoder_fw_state, encoder_bw_state)
                ) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=encoder_cell,
                    cell_bw=encoder_cell_bw,
                    inputs=inputs,
                    sequence_length=self.encoder_inputs_length,
                    dtype=tf.float32,
                    time_major=self.time_major,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True
                )

                # 首先合并两个方向 RNN 的输出
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                encoder_state = []
                for i in range(self.depth):
                    encoder_state.append(encoder_fw_state[i])#前向
                    encoder_state.append(encoder_bw_state[i])#后向
                encoder_state = tuple(encoder_state)
                #encoder_state=(encoder_fw_state[0],encoder_bw_state[0].......)

            return encoder_outputs, encoder_state
    def build_decoder(self, encoder_outputs, encoder_state):
        """构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            (self.decoder_cell,
             self.decoder_initial_state) = self.build_decoder_cell(
                 encoder_outputs, encoder_state)

            # 解码器embedding
            with tf.device(_get_embed_device(self.target_vocab_size)):
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.target_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')
                    self.decoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.target_vocab_size, self.embedding_size))
                    self.decoder_embeddings_init = \
                        self.decoder_embeddings.assign(
                            self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,
                dtype=tf.float32,
                use_bias=False,
                name='decoder_output_projection')

            if self.mode == 'train':
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)
                inputs = self.decoder_inputs_embedded

                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper')

                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                )

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(self.decoder_inputs_length)

                (
                    outputs,
                    self.final_state,  # contain attention
                    _  # self.final_sequence_lengths
                ) = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope)

                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output)

                # masks: masking for valid and padded time steps,
                # [batch_size, max_time_step + 1]
                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(
                        decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # 下面的一些变量用于特殊的学习训练
                # 自定义rewards,其实我这里是修改了masks
                # train_entropy = cross entropy
                self.train_entropy = \
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.decoder_inputs,
                        logits=decoder_logits_train)

                self.masks_rewards = self.masks * self.rewards

                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks_rewards,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_inputs,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                # 预测模式,非训练

                start_tokens = tf.tile([WordSequence.START], [self.batch_size])
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    """输入层的投影层wrapper
                    """
                    return tf.nn.embedding_lookup(self.decoder_embeddings,
                                                  inputs)

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding:
                    # uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    # print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection)
                else:
                    # Beamsearch is used to approximately
                    # find the most likely translation
                    # print("building beamsearch decoder..")
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection,
                    )

                if self.max_decode_step is not None:
                    max_decode_step = self.max_decode_step
                else:
                    # 默认 4 倍输入长度的输出解码
                    max_decode_step = tf.round(
                        tf.reduce_max(self.encoder_inputs_length) * 4)

                (
                    self.decoder_outputs_decode,
                    self.final_state,
                    _  # self.decoder_outputs_length_decode
                ) = (
                    seq2seq.dynamic_decode(
                        decoder=inference_decoder,
                        output_time_major=self.time_major,
                        # impute_finished=True,	# error occurs
                        maximum_iterations=max_decode_step,
                        parallel_iterations=self.parallel_iterations,
                        swap_memory=True,
                        scope=decoder_scope))

                if not self.use_beamsearch_decode:

                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))

                else:
                    self.decoder_pred_decode = \
                        self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))

                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode, perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
示例#7
0
    def build_encoder(self):
        """构建编码器"""

        # print("构建编码器")
        with tf.variable_scope('encoder'):
            # 构建 encoder_cell
            encoder_cell = self.build_encoder_cell()

            # 编码器的embedding运行在GPU/CPU上
            with tf.device(_get_embed_device(self.input_vocab_size)):

                # 加载预训练好的embedding
                if self.pretrained_embedding:

                    self.encoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.input_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')

                    self.encoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.input_vocab_size, self.embedding_size))

                    self.encoder_embeddings_init = self.encoder_embeddings.assign(
                        self.encoder_embeddings_placeholder)

                else:
                    self.encoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.input_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

            # embedding之后的输入 shape = (batch_size, time_step, embedding_size)
            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings, ids=self.encoder_inputs)

            # 当时用残差网络时
            if self.use_residual:
                self.encoder_inputs_embedded = layers.dense(
                    self.encoder_inputs_embedded,
                    self.hidden_units,
                    use_bias=False,
                    name='encoder_residual_projection')

            inputs = self.encoder_inputs_embedded
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirectional:
                # 单向 RNN
                (encoder_outputs, encoder_state) = tf.nn.dynamic_rnn(
                    cell=encoder_cell,
                    inputs=inputs,
                    sequence_length=self.encoder_inputs_length,
                    dtype=tf.float32,
                    time_major=self.time_major,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True)
            else:
                # 双向RNN
                # encoder_cell_fw = self.build_encoder_cell()
                encoder_cell_bw = self.build_encoder_cell()  # backward cell
                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_state,
                  encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=encoder_cell,
                      cell_bw=encoder_cell_bw,
                      inputs=inputs,
                      sequence_length=self.encoder_inputs_length,
                      dtype=tf.float32,
                      time_major=self.time_major,
                      parallel_iterations=self.parallel_iterations,
                      swap_memory=True)

                # 首先合并两个方向 RNN 的输出
                encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                encoder_state = []
                for i in range(self.depth):
                    encoder_state.append(encoder_fw_state[i])
                    encoder_state.append(encoder_bw_state[i])
                encoder_state = tuple(encoder_state)

            return encoder_outputs, encoder_state
示例#8
0
    def build_decoder(self):
        """构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            # Building decoder_cell and decoder_initial_state
            (self.decoder_cell,
             self.decoder_initial_state) = self.build_decoder_cell()

            # 解码器embedding
            if self.share_embedding:
                self.decoder_embeddings = self.encoder_embeddings
            else:
                with tf.device(_get_embed_device(self.target_vocab_size)):
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

            # 使用 residual 的时候,对齐输入
            if self.use_residual:
                self.decoder_embeddings = tf.layers.dense(
                    self.decoder_embeddings, self.hidden_units * 2)

            # On Using Very Large Target Vocabulary
            # for Neural Machine Translation
            # https://arxiv.org/pdf/1412.2007v2.pdf

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            hidden_units = self.hidden_units
            if self.bidirectional:
                hidden_units *= 2

            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,
                dtype=tf.float32,
                # use_bias=False,
                name='decoder_output_projection')

            if self.mode == 'train':
                # decoder_inputs_embedded:
                # [batch_size, max_time_step + 1, embedding_size]
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                # Helper to feed inputs for training:
                # read inputs from dense ground truth vectors
                inputs = self.decoder_inputs_embedded

                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length_train,
                    time_major=self.time_major,
                    name='training_helper')

                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state,
                    # output_layer=self.decoder_output_projection
                )

                # Maximum decoder time_steps in current batch
                max_decoder_length = tf.reduce_max(
                    self.decoder_inputs_length_train)

                # decoder_outputs_train: BasicDecoderOutput
                #     namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_train.rnn_output:
                #     if output_time_major=False:
                #         [batch_size, max_time_step + 1, num_decoder_symbols]
                #     if output_time_major=True:
                #         [max_time_step + 1, batch_size, num_decoder_symbols]
                # decoder_outputs_train.sample_id: [batch_size], tf.int32

                (
                    outputs,
                    self.final_state,  # contain attention
                    _  # self.final_sequence_lengths
                ) = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=True,
                    maximum_iterations=max_decoder_length,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope)

                # More efficient to do the projection
                # on the batch-time-concatenated tensor
                # logits_train:
                # [batch_size, max_time_step + 1, num_decoder_symbols]
                # 训练的时候一次性对所有的结果进行 output_layer 的投影运算
                # 官方NMT库说这样能提高10~20%的速度
                # 实际上我提高的速度会更大
                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output)

                # masks: masking for valid and padded time steps,
                # [batch_size, max_time_step + 1]
                self.masks = tf.sequence_mask(
                    lengths=self.decoder_inputs_length_train,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                # Computes per word average cross-entropy over a batch
                # Internally calls
                # 'nn_ops.sparse_softmax_cross_entropy_with_logits' by default

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(
                        decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                # 下面的一些变量用于特殊的学习训练
                # 自定义rewards,其实我这里是修改了masks
                self.train_entropy = \
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.decoder_targets_train,
                        logits=decoder_logits_train)
                self.masks_rewards = self.masks * self.rewards
                self.loss_rewards = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_targets_train,
                    weights=self.masks_rewards,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss = seq2seq.sequence_loss(
                    logits=decoder_logits_train,
                    targets=self.decoder_targets_train,
                    weights=self.masks,
                    average_across_timesteps=True,
                    average_across_batch=True,
                )

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                # 预测模式,非训练

                start_tokens = tf.tile([WordSequence.START], [self.batch_size])
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    """输入层的投影层wrapper
                    """
                    return tf.nn.embedding_lookup(self.decoder_embeddings,
                                                  inputs)

                if not self.use_beamsearch_decode:
                    # Helper to feed inputs for greedy decoding:
                    # uses the argmax of the output
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)
                    # Basic decoder performs greedy decoding at each time step
                    # print("building greedy decoder..")
                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection)
                else:
                    # Beamsearch is used to approximately
                    # find the most likely translation
                    # print("building beamsearch decoder..")
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection,
                    )

                # For GreedyDecoder, return
                # decoder_outputs_decode: BasicDecoderOutput instance
                #     namedtuple(rnn_outputs, sample_id)
                # decoder_outputs_decode.rnn_output:
                # if output_time_major=False:
                #     [batch_size, max_time_step, num_decoder_symbols]
                # if output_time_major=True
                #     [max_time_step, batch_size, num_decoder_symbols]
                # decoder_outputs_decode.sample_id:
                # if output_time_major=False
                #     [batch_size, max_time_step], tf.int32
                # if output_time_major=True
                #     [max_time_step, batch_size], tf.int32

                # For BeamSearchDecoder, return
                # decoder_outputs_decode: FinalBeamSearchDecoderOutput instance
                #     namedtuple(predicted_ids, beam_search_decoder_output)
                # decoder_outputs_decode.predicted_ids:
                # if output_time_major=False:
                #     [batch_size, max_time_step, beam_width]
                # if output_time_major=True
                #     [max_time_step, batch_size, beam_width]
                # decoder_outputs_decode.beam_search_decoder_output:
                #     BeamSearchDecoderOutput instance
                #     namedtuple(scores, predicted_ids, parent_ids)

                # 官方文档提到的一个潜在的最大长度选择
                # maximum_iterations = tf.round(tf.reduce_max(source_sequence_length) * 2)
                # https://www.tensorflow.org/tutorials/seq2seq

                if self.max_decode_step is not None:
                    max_decode_step = self.max_decode_step
                else:
                    # 默认 4 倍输入长度的输出解码
                    max_decode_step = tf.round(
                        tf.reduce_max(self.encoder_inputs_length) * 4)

                (
                    self.decoder_outputs_decode,
                    self.final_state,
                    _  # self.decoder_outputs_length_decode
                ) = (
                    seq2seq.dynamic_decode(
                        decoder=inference_decoder,
                        output_time_major=self.time_major,
                        # impute_finished=True,	# error occurs
                        maximum_iterations=max_decode_step,
                        parallel_iterations=self.parallel_iterations,
                        swap_memory=True,
                        scope=decoder_scope))

                if not self.use_beamsearch_decode:
                    # decoder_outputs_decode.sample_id:
                    #     [batch_size, max_time_step]
                    # Or use argmax to find decoder symbols to emit:
                    # self.decoder_pred_decode = tf.argmax(
                    #     self.decoder_outputs_decode.rnn_output,
                    #     axis=-1, name='decoder_pred_decode')

                    # Here, we use expand_dims to be compatible with
                    # the result of the beamsearch decoder
                    # decoder_pred_decode:
                    #     [batch_size, max_time_step, 1] (output_major=False)

                    # self.decoder_pred_decode = tf.expand_dims(
                    #     self.decoder_outputs_decode.sample_id,
                    #     -1
                    # )

                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))

                else:
                    # Use beam search to approximately
                    # find the most likely translation
                    # decoder_pred_decode:
                    # [batch_size, max_time_step, beam_width] (output_major=False)
                    self.decoder_pred_decode = \
                        self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))

                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode, perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
示例#9
0
    def build_encoder(self):
        """构建编码器
        """
        # print("构建编码器")
        with tf.variable_scope('encoder'):
            # 构建 encoder_cell
            self.encoder_cell = self.build_encoder_cell()

            # 编码器的embedding
            with tf.device(_get_embed_device(self.input_vocab_size)):
                self.encoder_embeddings = tf.get_variable(
                    name='embedding',
                    shape=(self.input_vocab_size, self.embedding_size),
                    initializer=self.initializer,
                    dtype=tf.float32)

            # embedded之后的输入 shape = (batch_size, time_step, embedding_size)
            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings, ids=self.encoder_inputs)

            # Encode input sequences into context vectors:
            # encoder_outputs: [batch_size, max_time_step, cell_output_size]
            # encoder_state: [batch_size, cell_output_size]

            inputs = self.encoder_inputs_embedded
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirectional:
                # 单向 RNN
                (self.encoder_outputs,
                 self.encoder_last_state) = tf.nn.dynamic_rnn(
                     cell=self.encoder_cell,
                     inputs=inputs,
                     sequence_length=self.encoder_inputs_length,
                     dtype=tf.float32,
                     time_major=self.time_major,
                     parallel_iterations=self.parallel_iterations,
                     swap_memory=True)
            else:
                # 双向 RNN 比较麻烦
                self.encoder_cell_bw = self.build_encoder_cell()
                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_state,
                  encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=self.encoder_cell,
                      cell_bw=self.encoder_cell_bw,
                      inputs=inputs,
                      sequence_length=self.encoder_inputs_length,
                      dtype=tf.float32,
                      time_major=self.time_major,
                      parallel_iterations=self.parallel_iterations,
                      swap_memory=True)

                # 首先合并两个方向 RNN 的输出
                self.encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                # 在 bidirectional 的情况下合并 state
                # QHD
                # borrow from
                # https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/model_new.py
                # 对上面链接中的代码有修改,因为原代码没有考虑多层cell的情况(MultiRNNCell)
                if isinstance(encoder_fw_state[0], LSTMStateTuple):
                    # LSTM 的 cell
                    self.encoder_last_state = tuple([
                        LSTMStateTuple(
                            c=tf.concat(
                                (encoder_fw_state[i].c, encoder_bw_state[i].c),
                                1),
                            h=tf.concat(
                                (encoder_fw_state[i].h, encoder_bw_state[i].h),
                                1)) for i in range(len(encoder_fw_state))
                    ])
                elif isinstance(encoder_fw_state[0], tf.Tensor):
                    # GRU 的中间状态只有一个,所以类型是 tf.Tensor
                    # 分别合并(concat)就可以了
                    self.encoder_last_state = tuple([
                        tf.concat((encoder_fw_state[i], encoder_bw_state[i]),
                                  1,
                                  name='bidirectional_concat_{}'.format(i))
                        for i in range(len(encoder_fw_state))
                    ])
示例#10
0
    def build_encoder(self):
        """
        构建编码器
        :return:encoder_outputs, 最后一层rnn的输出
                encoder_state,每一层的final state
        """
        with tf.variable_scope('encoder'):
            encoder_cell = self.build_encoder_cell()

            # 编码器的embedding
            with tf.device(_get_embed_device(self.input_vocab_size)):
                # 加载训练好的embedding
                if self.pretrained_embedding:
                    # 预训练模式
                    self.encoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.input_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')
                    self.encoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.input_vocab_size, self.embedding_size))
                    self.encoder_embeddings_init = \
                        self.encoder_embeddings.assign(
                            self.encoder_embeddings_placeholder)

                else:
                    self.encoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.input_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

        # embedded之后的输入 shape = (batch_size, time_step, embedding_size)
        self.encoder_inputs_embedded = tf.nn.embedding_lookup(
            params=self.encoder_embeddings, ids=self.encoder_inputs)

        # 使用残差结构,先将输入的维度转换为隐藏层的维度
        if self.use_residual:
            self.encoder_inputs_embedded = \
                layers.dense(self.encoder_inputs_embedded,
                             self.hidden_units,
                             use_bias=False,
                             name='encoder_residual_projection')

        inputs = self.encoder_inputs_embedded
        if self.time_major:
            inputs = tf.transpose(inputs, (1, 0, 2))

        if not self.bidirectional:
            (encoder_outputs, encoder_state) = tf.nn.dynamic_rnn(
                cell=encoder_cell,
                inputs=inputs,
                sequence_length=self.encoder_inputs_length,
                dtype=tf.float32,
                time_major=self.time_major,
                parallel_iterations=self.parallel_iterations,
                swap_memory=True  # 动态rnn,可以交换内存
            )
        else:
            encoder_cell_bw = self.build_encoder_cell()
            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
                  cell_fw=encoder_cell,
                  cell_bw=encoder_cell_bw,
                  inputs=inputs,
                  sequence_length=self.encoder_inputs_length,
                  dtype=tf.float32,
                  time_major=self.time_major,
                  parallel_iterations=self.parallel_iterations,
                  swap_memory=True)

            encoder_outputs = tf.concat(
                (encoder_fw_outputs, encoder_bw_outputs), 2)

            encoder_state = []
            for i in range(self.depth):
                c_fw, h_fw = encoder_fw_state[i]
                c_bw, h_bw = encoder_bw_state[i]
                c = tf.concat((c_fw, c_bw), axis=-1)
                h = tf.concat((h_fw, h_bw), axis=-1)
                encoder_state.append(LSTMStateTuple(c=c, h=h))
            encoder_state = tuple(encoder_state)

        return encoder_outputs, encoder_state
示例#11
0
    def build_decoder(self, encoder_outputs, encoder_state):
        """ 
        构建解码器
        """
        with tf.variable_scope('decoder') as decoder_scope:
            (self.decoder_cell,
             self.decoder_initial_state) = self.build_decoder_cell(
                 encoder_outputs, encoder_state)
            #构建解码器的embedding
            with tf.device(_get_embed_device(self.target_vocab_size)):
                if self.share_embedding:
                    self.decoder_embeddings = self.encoder_embeddings
                elif self.pretrained_embedding:

                    self.decoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.target_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')
                    self.decoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.target_vocab_size, self.embedding_size))
                    self.decoder_embeddings_init = self.decoder_embeddings.assign(
                        self.decoder_embeddings_placeholder)
                else:
                    self.decoder_embeddings = tf.get_variable(
                        name='embeddings',
                        shape=(self.target_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)
            self.decoder_output_projection = layers.Dense(
                self.target_vocab_size,
                dtype=tf.float32,
                use_bias=False,
                name='decoder_output_projection')

            if self.mode == 'train':
                self.decoder_inputs_embedded = tf.nn.embedding_lookup(
                    params=self.decoder_embeddings,
                    ids=self.decoder_inputs_train)

                inputs = self.decoder_inputs_embedded
                if self.time_major:
                    inputs = tf.transpose(inputs, (1, 0, 2))

                training_helper = seq2seq.TrainingHelper(
                    #根据预测值或者真实值得到下一刻的输入
                    inputs=inputs,
                    sequence_length=self.decoder_inputs_length,
                    time_major=self.time_major,
                    name='training_helper')
                # 训练的时候不在这里应用 output_layer
                # 因为这里会每个 time_step 的进行 output_layer 的投影计算,比较慢
                # 注意这个trick要成功必须设置 dynamic_decode 的 scope 参数
                training_decoder = seq2seq.BasicDecoder(
                    cell=self.decoder_cell,
                    helper=training_helper,
                    initial_state=self.decoder_initial_state
                    #output_layer = self.decoder_output_projection    #输出映射层,将rnn_size转化为vocab_size维
                )
                #decoder在当前的batch下的最大time_steps
                max_decoder_length = tf.reduce_max(self.decoder_inputs_length)

                outputs, self.final_state, _ = seq2seq.dynamic_decode(
                    decoder=training_decoder,
                    output_time_major=self.time_major,
                    impute_finished=
                    True,  #Boolean,为真时会拷贝最后一个时刻的状态并将输出置零,程序运行更稳定,使最终状态和输出具有正确的值,在反向传播时忽略最后一个完成步。但是会降低程序运行速度。
                    maximum_iterations=
                    max_decoder_length,  #最大解码步数,一般训练设置为decoder_inputs_length,预测时设置一个想要的最大序列长度即可。程序会在产生<eos>或者到达最大步数处停止
                    parallel_iterations=self.
                    parallel_iterations,  #parallel_iterations是并行执行循环的个数
                    swap_memory=True,
                    scope=decoder_scope)

                self.decoder_logits_train = self.decoder_output_projection(
                    outputs.rnn_output)
                self.masks = tf.sequence_mask(
                    #构建序列长度的mask标志
                    lengths=self.decoder_inputs_length,
                    maxlen=max_decoder_length,
                    dtype=tf.float32,
                    name='masks')

                decoder_logits_train = self.decoder_logits_train
                if self.time_major:
                    decoder_logits_train = tf.transpose(
                        decoder_logits_train, (1, 0, 2))

                self.decoder_pred_train = tf.argmax(decoder_logits_train,
                                                    axis=-1,
                                                    name='decoder_pred_train')

                self.train_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.decoder_inputs,  #真实值y
                    logits=decoder_logits_train  #预测值y_
                )

                self.masks_rewards = self.masks * self.rewards

                self.loss_rewards = seq2seq.sequence_loss(
                    logits=
                    decoder_logits_train,  #[batch_size, sequence_length, num_decoder_symbols]
                    targets=self.
                    decoder_inputs,  #[batch_size, sequence_length]  不用做one_hot
                    weights=self.
                    masks_rewards,  #[batch_size, sequence_length]  即mask,滤去padding的loss计算,使loss计算更准确。
                    average_across_timesteps=True,
                    average_across_batch=True)

                self.loss = seq2seq.sequence_loss(
                    #序列的损失函数
                    logits=
                    decoder_logits_train,  #[batch_size, sequence_length, num_decoder_symbols]
                    targets=self.
                    decoder_inputs,  #[batch_size, sequence_length]  不用做one_hot
                    weights=self.masks,  # 即mask,滤去padding的loss计算,使loss计算更准确。
                    average_across_timesteps=True,
                    average_across_batch=True)

                self.loss_add = self.loss + self.add_loss

            elif self.mode == 'decode':
                start_tokens = tf.tile([WordSequence.START], [self.batch_size])
                end_token = WordSequence.END

                def embed_and_input_proj(inputs):
                    return tf.nn.embedding_lookup(self.decoder_embeddings,
                                                  inputs)

                if not self.use_beamsearch_decode:
                    decoding_helper = seq2seq.GreedyEmbeddingHelper(
                        start_tokens=start_tokens,
                        end_token=end_token,
                        embedding=embed_and_input_proj)

                    inference_decoder = seq2seq.BasicDecoder(
                        cell=self.decoder_cell,
                        helper=decoding_helper,
                        initial_state=self.decoder_initial_state,
                        output_layer=self.decoder_output_projection)
                else:
                    inference_decoder = BeamSearchDecoder(
                        cell=self.decoder_cell,
                        embedding=embed_and_input_proj,
                        start_tokens=start_tokens,
                        end_token=end_token,
                        initial_state=self.decoder_initial_state,
                        beam_width=self.beam_width,
                        output_layer=self.decoder_output_projection)

                if self.max_decode_step is not None:
                    max_decoder_step = self.max_decode_step
                else:
                    max_decoder_step = tf.round(
                        tf.reduce_max(self.encoder_inputs_length) * 4)

                self.decoder_outputs_decode, self.final_state, _ = seq2seq.dynamic_decode(
                    decoder=inference_decoder,
                    output_time_major=self.time_major,
                    maximum_iterations=max_decoder_step,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=True,
                    scope=decoder_scope)

                if not self.use_beamsearch_decode:
                    dod = self.decoder_outputs_decode
                    self.decoder_pred_decode = dod.sample_id
                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0))
                else:
                    self.decoder_pred_decode = self.decoder_outputs_decode.predicted_ids

                    if self.time_major:
                        self.decoder_pred_decode = tf.transpose(
                            self.decoder_pred_decode, (1, 0, 2))
                    self.decoder_pred_decode = tf.transpose(
                        self.decoder_pred_decode, perm=[0, 2, 1])
                    dod = self.decoder_outputs_decode
                    self.beam_prob = dod.beam_search_decoder_output.scores
示例#12
0
    def build_encoder(self):
        """ 构建编码器"""

        with tf.variable_scope('encoder'):  #变量命名空间 ,实现变量共享
            encoder_cell = self.build_encoder_cell()

            with tf.device(_get_embed_device(
                    self.input_vocab_size)):  #判断使用显存还是内存
                if self.pretrained_embedding:
                    self.encoder_embeddings = tf.Variable(tf.constant(
                        0.0,
                        shape=(self.input_vocab_size, self.embedding_size)),
                                                          trainable=True,
                                                          name='embeddings')

                    self.encoder_embeddings_placeholder = tf.placeholder(
                        tf.float32,
                        (self.input_vocab_size, self.embedding_size))
                    self.encoder_embeddings_init = self.encoder_embeddings.assign(  #赋值操作
                        self.encoder_embeddings_placeholder)
                else:
                    self.encoder_embeddings = tf.get_variable(
                        name='embedding',
                        shape=(self.input_vocab_size, self.embedding_size),
                        initializer=self.initializer,
                        dtype=tf.float32)

            self.encoder_inputs_embedded = tf.nn.embedding_lookup(  #函数是在params中查找ids的表示 
                #这里是在二维embeddings中找二维的ids, ids每一行中的一个数对应embeddings中的一行,所以最后是[batch_size, time_step, embedding_size]
                params=self.encoder_embeddings,
                ids=self.encoder_inputs)
            if self.use_residual:
                #全连接层
                self.encoder_inputs_embedded = layers.dense(
                    self.encoder_inputs_embedded,
                    self.hidden_units,
                    use_bias=False,
                    name='encoder_residual_projection')
            inputs = self.encoder_inputs_embedded
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirectional:
                (encoder_outputs, encoder_state) = tf.nn.dynamic_rnn(
                    cell=encoder_cell,
                    inputs=inputs,
                    sequence_length=self.encoder_inputs_length,
                    dtype=tf.float32,
                    time_major=self.time_major,
                    parallel_iterations=self.parallel_iterations,
                    swap_memory=False)
            else:
                encoder_cell_bw = self.build_encoder_cell()
                ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state,
                                                            encoder_bw_state)
                 ) = tf.nn.bidirectional_dynamic_rnn(  #动态多层双向lstm_rnn
                     cell_fw=encoder_cell,
                     cell_bw=encoder_cell_bw,
                     inputs=inputs,
                     sequence_length=self.encoder_inputs_length,
                     dtype=tf.float32,
                     time_major=self.time_major,
                     parallel_iterations=self.parallel_iterations,
                     swap_memory=True)
                encoder_outputs = tf.concat(
                    [encoder_fw_outputs, encoder_bw_outputs], 2)

                encoder_state = []
                for i in range(self.depth):
                    encoder_state.append(encoder_fw_state[i])
                    encoder_state.append(encoder_bw_state[i])
                encoder_state = tuple(encoder_state)

            return encoder_outputs, encoder_state
示例#13
0
    def build_encoder(self):
        """构建编码器
        """
        # print("构建编码器")
        with tf.variable_scope('encoder'):
            # 构建 encoder_cell
            self.encoder_cell = self.build_encoder_cell()

            # 编码器的embedding
            with tf.device(_get_embed_device(self.input_vocab_size)):
                self.encoder_embeddings = tf.get_variable(
                    name='embedding',
                    shape=(self.input_vocab_size, self.embedding_size),
                    initializer=self.initializer,
                    dtype=tf.float32)

            # embedded之后的输入 shape = (batch_size, time_step, embedding_size)
            self.encoder_inputs_embedded = tf.nn.embedding_lookup(
                params=self.encoder_embeddings, ids=self.encoder_inputs)

            # Input projection layer to feed embedded inputs to the cell
            # ** Essential when use_residual=True to match input/output dims
            # 输入投影层
            # 如果使用了residual,为了对齐输入和输出层,这里可能必须增加一个投影
            input_layer = layers.Dense(self.hidden_units,
                                       dtype=tf.float32,
                                       use_bias=False,
                                       name='input_projection')
            self.input_layer = input_layer

            # Embedded inputs having gone through input projection layer
            self.encoder_inputs_embedded = input_layer(
                self.encoder_inputs_embedded)

            # Encode input sequences into context vectors:
            # encoder_outputs: [batch_size, max_time_step, cell_output_size]
            # encoder_state: [batch_size, cell_output_size]

            inputs = self.encoder_inputs_embedded
            if self.time_major:
                inputs = tf.transpose(inputs, (1, 0, 2))

            if not self.bidirectional:
                (self.encoder_outputs,
                 self.encoder_last_state) = tf.nn.dynamic_rnn(
                     cell=self.encoder_cell,
                     inputs=inputs,
                     sequence_length=self.encoder_inputs_length,
                     dtype=tf.float32,
                     time_major=self.time_major,
                     parallel_iterations=self.parallel_iterations,
                     swap_memory=True)
            else:
                self.encoder_cell_bw = self.build_encoder_cell()
                ((encoder_fw_outputs, encoder_bw_outputs),
                 (encoder_fw_state,
                  encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
                      cell_fw=self.encoder_cell,
                      cell_bw=self.encoder_cell_bw,
                      inputs=inputs,
                      sequence_length=self.encoder_inputs_length,
                      dtype=tf.float32,
                      time_major=self.time_major,
                      parallel_iterations=self.parallel_iterations,
                      swap_memory=True)

                self.encoder_outputs = tf.concat(
                    (encoder_fw_outputs, encoder_bw_outputs), 2)

                # 在 bidirectional 的情况下合并 state
                # QHD
                # borrow from
                # https://github.com/ematvey/tensorflow-seq2seq-tutorials/blob/master/model_new.py
                # 对上面链接中的代码有修改,因为原代码没有考虑多层cell的情况(MultiRNNCell)
                if isinstance(encoder_fw_state[0], LSTMStateTuple):
                    # LSTM 的 cell
                    self.encoder_last_state = tuple([
                        LSTMStateTuple(
                            c=tf.concat(
                                (encoder_fw_state[i].c, encoder_bw_state[i].c),
                                1),
                            h=tf.concat(
                                (encoder_fw_state[i].h, encoder_bw_state[i].h),
                                1)) for i in range(len(encoder_fw_state))
                    ])
                elif isinstance(encoder_fw_state[0], tf.Tensor):
                    # GRU 的中间状态只有一个,所以类型是 tf.Tensor
                    # 分别合并(concat)就可以了
                    self.encoder_last_state = tuple([
                        tf.concat((encoder_fw_state[i], encoder_bw_state[i]),
                                  1,
                                  name='bidirectional_concat_{}'.format(i))
                        for i in range(len(encoder_fw_state))
                    ])