예제 #1
0
    def __init__(self,
                 reversed_dict,
                 article_max_len,
                 summary_max_len,
                 args,
                 forward_only=False):
        self.vocabulary_size = len(reversed_dict)
        self.embedding_size = args.embedding_size
        self.num_hidden = args.num_hidden
        self.num_layers = args.num_layers
        self.learning_rate = args.learning_rate
        self.beam_width = args.beam_width
        if not forward_only:
            self.keep_prob = args.keep_prob
        else:
            self.keep_prob = 1.0
        self.cell = tf.nn.rnn_cell.BasicLSTMCell
        with tf.variable_scope("decoder/projection"):
            self.projection_layer = tf.layers.Dense(self.vocabulary_size,
                                                    use_bias=False)

        self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
        self.X = tf.placeholder(tf.int32, [None, article_max_len])
        self.X_len = tf.placeholder(tf.int32, [None])
        self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])
        self.decoder_len = tf.placeholder(tf.int32, [None])
        self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len])
        self.global_step = tf.Variable(0, trainable=False)

        with tf.name_scope("embedding"):
            if not forward_only and args.glove:
                init_embeddings = tf.constant(get_init_embedding(
                    reversed_dict, self.embedding_size),
                                              dtype=tf.float32)
            else:
                init_embeddings = tf.random_uniform(
                    [self.vocabulary_size, self.embedding_size], -1.0, 1.0)
            self.embeddings = tf.get_variable("embeddings",
                                              initializer=init_embeddings)
            self.encoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(
                self.embeddings, self.X),
                                                perm=[1, 0, 2])
            self.decoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(
                self.embeddings, self.decoder_input),
                                                perm=[1, 0, 2])

        with tf.name_scope("encoder"):
            fw_cells = [
                self.cell(self.num_hidden) for _ in range(self.num_layers)
            ]
            bw_cells = [
                self.cell(self.num_hidden) for _ in range(self.num_layers)
            ]
            fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
            bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]

            encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
                fw_cells,
                bw_cells,
                self.encoder_emb_inp,
                sequence_length=self.X_len,
                time_major=True,
                dtype=tf.float32)
            self.encoder_output = tf.concat(encoder_outputs, 2)
            encoder_state_c = tf.concat(
                (encoder_state_fw[0].c, encoder_state_bw[0].c), 1)
            encoder_state_h = tf.concat(
                (encoder_state_fw[0].h, encoder_state_bw[0].h), 1)
            self.encoder_state = rnn.LSTMStateTuple(c=encoder_state_c,
                                                    h=encoder_state_h)

        with tf.name_scope("decoder"), tf.variable_scope(
                "decoder") as decoder_scope:
            decoder_cell = self.cell(self.num_hidden * 2)

            if not forward_only:
                attention_states = tf.transpose(self.encoder_output, [1, 0, 2])
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.num_hidden * 2,
                    attention_states,
                    memory_sequence_length=self.X_len,
                    normalize=True)
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    decoder_cell,
                    attention_mechanism,
                    attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(
                    dtype=tf.float32, batch_size=self.batch_size)
                initial_state = initial_state.clone(
                    cell_state=self.encoder_state)
                helper = tf.contrib.seq2seq.TrainingHelper(
                    self.decoder_emb_inp, self.decoder_len, time_major=True)
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell, helper, initial_state)
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder, output_time_major=True, scope=decoder_scope)
                self.decoder_output = outputs.rnn_output
                self.logits = tf.transpose(self.projection_layer(
                    self.decoder_output),
                                           perm=[1, 0, 2])
                self.logits_reshape = tf.concat([
                    self.logits,
                    tf.zeros([
                        self.batch_size, summary_max_len -
                        tf.shape(self.logits)[1], self.vocabulary_size
                    ])
                ],
                                                axis=1)
            else:
                tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
                    tf.transpose(self.encoder_output, perm=[1, 0, 2]),
                    multiplier=self.beam_width)
                tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                    self.encoder_state, multiplier=self.beam_width)
                tiled_seq_len = tf.contrib.seq2seq.tile_batch(
                    self.X_len, multiplier=self.beam_width)
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.num_hidden * 2,
                    tiled_encoder_output,
                    memory_sequence_length=tiled_seq_len,
                    normalize=True)
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    decoder_cell,
                    attention_mechanism,
                    attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(
                    dtype=tf.float32,
                    batch_size=self.batch_size * self.beam_width)
                initial_state = initial_state.clone(
                    cell_state=tiled_encoder_final_state)
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=self.embeddings,
                    start_tokens=tf.fill([self.batch_size], tf.constant(2)),
                    end_token=tf.constant(3),
                    initial_state=initial_state,
                    beam_width=self.beam_width,
                    output_layer=self.projection_layer)
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder,
                    output_time_major=True,
                    maximum_iterations=summary_max_len,
                    scope=decoder_scope)
                self.prediction = tf.transpose(outputs.predicted_ids,
                                               perm=[1, 2, 0])

        with tf.name_scope("loss"):
            if not forward_only:
                crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.logits_reshape, labels=self.decoder_target)
                weights = tf.sequence_mask(self.decoder_len,
                                           summary_max_len,
                                           dtype=tf.float32)
                self.loss = tf.reduce_sum(crossent * weights /
                                          tf.to_float(self.batch_size))

                params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, params)
                clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                self.update = optimizer.apply_gradients(
                    zip(clipped_gradients, params),
                    global_step=self.global_step)
예제 #2
0
    def __init__(self,
                 reversed_dict,
                 article_max_len,
                 summary_max_len,
                 args,
                 forward_only=False):
        self.vocabulary_size = len(reversed_dict)
        self.embedding_size = args.embedding_size
        self.num_hidden = args.num_hidden
        self.num_layers = args.num_layers
        self.learning_rate = args.learning_rate
        self.beam_width = args.beam_width
        self.cell_type = 'lstm'
        if not forward_only:
            self.keep_prob = args.keep_prob
        else:
            self.keep_prob = 1.0
        self.cell = tf.nn.rnn_cell.BasicLSTMCell  # 采用lstm cell 方式。
        with tf.variable_scope("decoder/projection"):
            self.projection_layer = tf.layers.Dense(self.vocabulary_size,
                                                    use_bias=False)

        self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
        self.X = tf.placeholder(tf.int32,
                                [None, article_max_len])  ##[batch_size,30]
        self.X_len = tf.placeholder(tf.int32, [None])
        self.decoder_input = tf.placeholder(
            tf.int32, [None, summary_max_len])  # [batch_size,15]
        self.decoder_len = tf.placeholder(tf.int32, [None])
        self.decoder_target = tf.placeholder(
            tf.int32, [None, summary_max_len])  # [batch_size,15]
        self.global_step = tf.Variable(0, trainable=False)

        with tf.name_scope("embedding"):
            if not forward_only and args.glove:  # 当训练状态,比并且参数选择了 glove打开状态。
                init_embeddings = tf.constant(get_init_embedding(
                    reversed_dict, self.embedding_size),
                                              dtype=tf.float32)
            else:
                init_embeddings = tf.random_uniform(
                    [self.vocabulary_size, self.embedding_size], -1.0, 1.0)
            self.embeddings = tf.get_variable("embeddings",
                                              initializer=init_embeddings)
            self.encoder_emb_inp = tf.transpose(
                tf.nn.embedding_lookup(self.embeddings, self.X),
                perm=[1, 0, 2])  # (50, batch_size, 300)
            self.decoder_emb_inp = tf.transpose(
                tf.nn.embedding_lookup(self.embeddings, self.decoder_input),
                perm=[1, 0, 2])  # (15, batch_size, 300)

        with tf.name_scope(
                "encoder"
        ):  # 当encoder时, 设置 前项和后项cell的 长度,以及隐藏cell 大小。  设置为 hidden150, 两层。

            #encoder_outputs, encoder_state_fw, encoder_state_bw = self._stack_bi_dynamic_rnn_cell()
            #   输入值,  利用前项和后项cell生成对应的长度的双向lstm,  time_major 是输入的batch_size 对应位置。stack_bidirectional_dynamic_rnn  bidirectional_dynamic_rnn  是两个相似的双心啊lstm
            encoder_outputs, encoder_state = self._bi_dynamic_rnn_cell()
            self.encoder_output = tf.concat(
                encoder_outputs,
                2)  # encoder_outputs: 50,batch,300.   encoder_output
            self.encoder_state = encoder_state

        # 在他程序中 是把 第一层的的 开始状态 当做  decode 的状态信息。 个人对次表示 怀疑。
        with tf.name_scope("decoder"), tf.variable_scope(
                "decoder") as decoder_scope:
            decoder_cell = self.cell(
                self.num_hidden *
                2)  # 从这里可以看出,decode,  只需要单层的解析。 decode_cell 是 300维度

            if not forward_only:
                attention_states = tf.transpose(
                    self.encoder_output, [1, 0, 2])  # batch_size, 50, 300
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    # 初始化attention的状态,隐层cell 300,,encode的所有数据。 每个句子的长度, 是否归一化。
                    self.num_hidden * 2,
                    attention_states,
                    memory_sequence_length=self.X_len,
                    normalize=True)  # 初始化attention的相关参数。
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    decoder_cell,
                    attention_mechanism,
                    # 单层lstm,attention的方法,attention的输出深度。
                    attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(
                    dtype=tf.float32, batch_size=self.batch_size
                )  # 给decode_cell 定义初始化。batch_size 不太懂。  lstmcell, attention,attention, attention,tuple, attention.不过对于如何初始化beam_search 比较特殊。
                initial_state = initial_state.clone(
                    cell_state=self.encoder_state)  # 重新定义它的初始化状态。
                helper = tf.contrib.seq2seq.TrainingHelper(
                    self.decoder_emb_inp, self.decoder_len,
                    time_major=True)  # 输入层定义。  decode的输入值,和它对应的长度,是否是那个奇葩格式。
                decoder = tf.contrib.seq2seq.BasicDecoder(
                    decoder_cell, helper, initial_state
                )  # cell rnn_cell的特殊形式, helper数据输入, initial_state : The initial state of the RNNCell., output_layer=None
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder, output_time_major=True,
                    scope=decoder_scope)  # 真正的执行过程,调用函数运行, 之前都是 初始化某个零件之类的。
                self.decoder_output = outputs.rnn_output
                self.logits = tf.transpose(  # self.decoder_output 的数据为 ( , ,300) 变为 ( ,,17211) 最后生成的句子长度不确定了。
                    self.projection_layer(self.decoder_output),
                    perm=[1, 0, 2])
                self.logits_reshape = tf.concat(  # 两个数据 在第一维度 融合在一起,让长度保持一致。
                    [
                        self.logits,
                        tf.zeros([
                            self.batch_size, summary_max_len -
                            tf.shape(self.logits)[1], self.vocabulary_size
                        ])
                    ],
                    axis=1)
            else:
                tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
                    # encoder_output:(50,batch_size, 300) 返回值为 batch_size*beam_width, 50,300
                    tf.transpose(self.encoder_output, perm=[1, 0, 2]),
                    multiplier=self.beam_width)
                tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
                    self.encoder_state,
                    multiplier=self.beam_width)  # (,300) 变为 某个值*10,300
                tiled_seq_len = tf.contrib.seq2seq.tile_batch(
                    self.X_len, multiplier=self.beam_width)  # x的长度 也进行相同的操作。
                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                    self.num_hidden * 2,
                    tiled_encoder_output,
                    memory_sequence_length=tiled_seq_len,
                    normalize=True)
                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                    decoder_cell,
                    attention_mechanism,
                    attention_layer_size=self.num_hidden * 2)
                initial_state = decoder_cell.zero_state(
                    dtype=tf.float32,
                    batch_size=self.batch_size * self.beam_width)
                initial_state = initial_state.clone(
                    cell_state=tiled_encoder_final_state)
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=decoder_cell,
                    embedding=self.embeddings,
                    start_tokens=tf.fill([self.batch_size], tf.constant(2)),
                    end_token=tf.constant(3),
                    initial_state=initial_state,
                    beam_width=self.beam_width,
                    output_layer=self.projection_layer)
                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder,
                    output_time_major=True,
                    maximum_iterations=summary_max_len,
                    scope=decoder_scope)
                self.prediction = tf.transpose(outputs.predicted_ids,
                                               perm=[1, 2, 0])

        with tf.name_scope("loss"):
            if not forward_only:
                crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.logits_reshape, labels=self.decoder_target)
                weights = tf.sequence_mask(self.decoder_len,
                                           summary_max_len,
                                           dtype=tf.float32)
                self.loss = tf.reduce_sum(crossent * weights /
                                          tf.to_float(self.batch_size))

                params = tf.trainable_variables()
                gradients = tf.gradients(self.loss, params)
                clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                optimizer = tf.train.AdamOptimizer(self.learning_rate)
                self.update = optimizer.apply_gradients(
                    zip(clipped_gradients, params),
                    global_step=self.global_step)