Python LuongAttention примеры использования

Язык программирования: Python

Пространство имен/Пакет: tensorflow.contrib.seq2seq

Класс/Тип: LuongAttention

Примеров на hotexamples.com: 20

Python LuongAttention - 20 примеров найдено. Это лучшие примеры Python кода для tensorflow.contrib.seq2seq.LuongAttention, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

LuongAttention(20)

Основные методы

LuongAttention (20)

Пример #1

Показать файл

def create_attention_mechanism(attention_option, num_units, memory,
                               source_sequence_length):
    """Create attention mechanism based on the attention_option."""
    if attention_option == "luong":
        attention_mechanism = LuongAttention(
            num_units, memory, memory_sequence_length=source_sequence_length)
    elif attention_option == "scaled_luong":
        attention_mechanism = LuongAttention(
            num_units,
            memory,
            memory_sequence_length=source_sequence_length,
            scale=True)
    elif attention_option == "bahdanau":
        attention_mechanism = BahdanauAttention(
            num_units, memory, memory_sequence_length=source_sequence_length)
    elif attention_option == "normed_bahdanau":
        attention_mechanism = BahdanauAttention(
            num_units,
            memory,
            memory_sequence_length=source_sequence_length,
            normalize=True)
    elif attention_option == "multi_head":
        attention_mechanism = MultiHeadAttention(
            num_units,
            memory,
            memory_sequence_length=source_sequence_length,
            num_heads=4)
    else:
        raise ValueError("Unknown attention option %s" % attention_option)

    return attention_mechanism

Пример #2

Показать файл

Файл: seq2seq_model.py Проект: isaac-you/AmusingPythonCodes

    def _create_decoder_cell(self):
        enc_outputs, enc_states, enc_seq_len = self.enc_outputs, self.enc_states, self.enc_seq_len
        batch_size = self.batch_size * self.cfg.beam_size if self.use_beam_search else self.batch_size
        with tf.variable_scope("attention"):
            if self.cfg.attention == "luong":  # Luong attention mechanism
                attention_mechanism = LuongAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)
            else:  # default using Bahdanau attention mechanism
                attention_mechanism = BahdanauAttention(
                    num_units=self.cfg.num_units,
                    memory=enc_outputs,
                    memory_sequence_length=enc_seq_len)

        def cell_input_fn(
            inputs, attention
        ):  # define cell input function to keep input/output dimension same
            # reference: https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/AttentionWrapper
            if not self.cfg.use_attention_input_feeding:
                return inputs
            input_project = tf.layers.Dense(self.cfg.num_units,
                                            dtype=tf.float32,
                                            name='attn_input_feeding')
            return input_project(tf.concat([inputs, attention], axis=-1))

        if self.cfg.top_attention:  # apply attention mechanism only on the top decoder layer
            cells = [
                self._create_rnn_cell() for _ in range(self.cfg.num_layers)
            ]
            cells[-1] = AttentionWrapper(
                cells[-1],
                attention_mechanism=attention_mechanism,
                name="Attention_Wrapper",
                attention_layer_size=self.cfg.num_units,
                initial_cell_state=enc_states[-1],
                cell_input_fn=cell_input_fn)
            initial_state = [state for state in enc_states]
            initial_state[-1] = cells[-1].zero_state(batch_size=batch_size,
                                                     dtype=tf.float32)
            dec_init_states = tuple(initial_state)
            cells = MultiRNNCell(cells)
        else:
            cells = MultiRNNCell(
                [self._create_rnn_cell() for _ in range(self.cfg.num_layers)])
            cells = AttentionWrapper(cells,
                                     attention_mechanism=attention_mechanism,
                                     name="Attention_Wrapper",
                                     attention_layer_size=self.cfg.num_units,
                                     initial_cell_state=enc_states,
                                     cell_input_fn=cell_input_fn)
            dec_init_states = cells.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=enc_states)
        return cells, dec_init_states

Пример #3

Показать файл

def apply_attention(cell_dec,
                    enc_hidden_states,
                    enc_final_state,
                    input_length,
                    batch_size,
                    attention_probability_fn,
                    dropout,
                    alignment_history=True):

    if attention_probability_fn == 'softmax':
        probability_fn = tf.nn.softmax
        score_mask_value = float('-inf')
    elif attention_probability_fn == 'hardmax':
        probability_fn = tf.contrib.seq2seq.hardmax
        score_mask_value = float('-inf')
    elif attention_probability_fn == 'sparsemax':

        def sparsemax(attentionscores):
            attentionscores = tf.contrib.sparsemax.sparsemax(attentionscores)
            with tf.control_dependencies([
                    tf.assert_non_negative(attentionscores),
                    tf.assert_less_equal(attentionscores, 1., summarize=60)
            ]):
                return tf.identity(attentionscores)

        probability_fn = sparsemax
        # sparsemax does not deal with -inf properly, and has significant numerical stability issues
        # with large numbers (positive or negative)
        score_mask_value = -1e+5
    else:
        raise ValueError("Invalid attention_probability_fn " +
                         str(attention_probability_fn))

    with tf.variable_scope(
            'attention',
            initializer=tf.initializers.identity(dtype=tf.float32)):
        attention = LuongAttention(int(cell_dec.output_size),
                                   enc_hidden_states,
                                   memory_sequence_length=input_length,
                                   probability_fn=probability_fn,
                                   score_mask_value=score_mask_value)
    cell_dec = AttentionWrapper(cell_dec,
                                attention,
                                cell_input_fn=lambda inputs, _: inputs,
                                attention_layer_size=int(cell_dec.output_size),
                                alignment_history=alignment_history,
                                initial_cell_state=enc_final_state)
    enc_final_state = cell_dec.zero_state(batch_size, dtype=tf.float32)

    cell_dec = ActivationWrapper(cell_dec, activation=tf.tanh)
    cell_dec = NotBrokenDropoutWrapper(cell_dec, output_keep_prob=dropout)

    return cell_dec, enc_final_state

Пример #4

Показать файл

    def add_decoder_op(self, enc_final_state, enc_hidden_states,
                       output_embed_matrix, training):
        cell_dec = tf.contrib.rnn.MultiRNNCell([
            self.make_rnn_cell(i, True) for i in range(self.config.rnn_layers)
        ])

        encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
        decoder_hidden_size = int(cell_dec.output_size)

        # if encoder and decoder have different sizes, add a projection layer
        if encoder_hidden_size != decoder_hidden_size:
            assert False, (encoder_hidden_size, decoder_hidden_size)
            with tf.variable_scope('hidden_projection'):
                kernel = tf.get_variable(
                    'kernel', (encoder_hidden_size, decoder_hidden_size),
                    dtype=tf.float32)

                # apply a relu to the projection for good measure
                enc_final_state = nest.map_structure(
                    lambda x: tf.nn.relu(tf.matmul(x, kernel)),
                    enc_final_state)
                enc_hidden_states = tf.nn.relu(
                    tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
        else:
            # flatten and repack the state
            enc_final_state = nest.pack_sequence_as(
                cell_dec.state_size, nest.flatten(enc_final_state))

        cell_dec = ParentFeedingCellWrapper(cell_dec, enc_final_state)
        if self.config.apply_attention:
            attention = LuongAttention(self.config.decoder_hidden_size,
                                       enc_hidden_states,
                                       self.input_length_placeholder,
                                       probability_fn=tf.nn.softmax)
            cell_dec = AttentionWrapper(
                cell_dec,
                attention,
                cell_input_fn=lambda inputs, _: inputs,
                attention_layer_size=self.config.decoder_hidden_size,
                initial_cell_state=enc_final_state)
            enc_final_state = cell_dec.zero_state(self.batch_size,
                                                  dtype=tf.float32)
        decoder = Seq2SeqDecoder(self.config, self.input_placeholder,
                                 self.input_length_placeholder,
                                 self.output_placeholder,
                                 self.output_length_placeholder,
                                 self.batch_number_placeholder)
        return decoder.decode(cell_dec, enc_final_state,
                              self.config.grammar.output_size,
                              output_embed_matrix, training)

Пример #5

Показать файл

    def add_decoder_op(self, enc_final_state, enc_hidden_states, output_embed_matrix, training):
        cell_dec = tf.contrib.rnn.MultiRNNCell([self.make_rnn_cell(i, for_decoder=True) for i in range(self.config.rnn_layers)])

        encoder_hidden_size = int(enc_hidden_states.get_shape()[-1])
        decoder_hidden_size = int(cell_dec.output_size)
        
        # if encoder and decoder have different sizes, add a projection layer
        if encoder_hidden_size != decoder_hidden_size:
            assert False, (encoder_hidden_size, decoder_hidden_size)
            with tf.variable_scope('hidden_projection'):
                kernel = tf.get_variable('kernel', (encoder_hidden_size, decoder_hidden_size), dtype=tf.float32)
            
                # apply a relu to the projection for good measure
                enc_final_state = nest.map_structure(lambda x: tf.nn.relu(tf.matmul(x, kernel)), enc_final_state)
                enc_hidden_states = tf.nn.relu(tf.tensordot(enc_hidden_states, kernel, [[2], [1]]))
        else:
            # flatten and repack the state
            enc_final_state = nest.pack_sequence_as(cell_dec.state_size, nest.flatten(enc_final_state))

        # to use these we need to tile the final encoder state / the memory
        # but that conflicts with our use of cell_dec on untiled inputs for the gold
        #cell_dec = ParentFeedingCellWrapper(cell_dec, tf.contrib.seq2seq.tile_batch(enc_final_state, self.config.beam_size))
        if self.config.apply_attention and False:
            attention = LuongAttention(decoder_hidden_size, enc_hidden_states, self.input_length_placeholder,
                                       probability_fn=tf.nn.softmax)
            cell_dec = AttentionWrapper(cell_dec, attention,
                                        cell_input_fn=lambda inputs, _: inputs,
                                        attention_layer_size=decoder_hidden_size,
                                        initial_cell_state=enc_final_state)
            enc_final_state = cell_dec.zero_state(self.batch_size, dtype=tf.float32)
        
        print('enc_final_state', enc_final_state)
        linear_layer = tf_core_layers.Dense(self.config.output_size)
        go_vector = tf.ones((self.batch_size,), dtype=tf.int32) * self.config.grammar.start
        decoder = BeamSearchOptimizationDecoder(training, cell_dec, output_embed_matrix, go_vector, self.config.grammar.end,
                                                enc_final_state,
                                                beam_width=self.config.beam_size, output_layer=linear_layer,
                                                gold_sequence=self.output_placeholder if training else None,
                                                gold_sequence_length=(self.output_length_placeholder+1) if training else None)
        
        if self.config.use_grammar_constraints:
            raise NotImplementedError("Grammar constraints are not implemented for the beam search yet")
        
        # dynamic_decode craps itself if we pass output_time_major=False, as it tries to transpose
        # the loss vector
        final_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=True, maximum_iterations=self.config.max_length)
        return final_outputs

Пример #6

Показать файл

    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """
        构建解码器cell
        :param encoder_outputs:
        :param encoder_state:
        :return:
        """
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirection:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        if self.use_beamsearch_decode:
            # 复制多份
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width
            )
            encoder_state = seq2seq.tile_batch(
                encoder_state, multiplier=self.beam_width
            )
            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width
            )
            batch_size *= self.beam_width

        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_size,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_size,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )

        cell = MultiRNNCell([
            self.build_single_cell(
                self.hidden_size,
                use_residual=self.use_residual)
            for _ in range(self.depth)
        ])

        alignment_history = (
            self.mode != 'train' and not self.use_beamsearch_decode
        )

        def cell_input_fn(inputs, attention):
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_size,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(
                                cell=cell,
                                attention_mechanism=self.attention_mechanism,
                                attention_layer_size=self.hidden_size,
                                alignment_history=alignment_history,
                                cell_input_fn=cell_input_fn,
                                name='Attention_Wrapper'
        )

        decoder_initial_state = cell.zero_state(
            batch_size, tf.float32)

        # 传递encoder状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state
        )

        return cell, decoder_initial_state

Пример #7

Показать файл

    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """构建解码器cell"""

        encoder_inputs_length = self.encoder_inputs_length
        batch_size = self.batch_size
        
        #编码器的参数可以用于解码器的参数初始化，当双向的时候只需要depth层的参数即可
        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        # 使用 BeamSearchDecoder 的时候，必须根据 beam_width 来成倍的扩大一些变量

        if self.use_beamsearch_decode:
            #将encoder_outputs扩大multiplier倍
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width)
            encoder_state = seq2seq.tile_batch(
                encoder_state, multiplier=self.beam_width)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)
            # 如果使用了 beamsearch 那么输入应该是 beam_width 倍于 batch_size 的
            batch_size *= self.beam_width

        # 下面是两种不同的 Attention 机制
        #https://blog.csdn.net/u010960155/article/details/82853632
        if self.attention_type.lower() == 'luong':
            # 'Luong' style attention: https://arxiv.org/abs/1508.04025
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length
            )
        else: # Default Bahdanau
            # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length
            )

        # 定义多层的rnn
        cell = MultiRNNCell([
            self.build_single_cell(
                self.hidden_units,
                use_residual=self.use_residual
            )
            for _ in range(self.depth)
        ])

        # 在非训练（预测）模式，并且没开启 beamsearch 的时候，打开 attention 历史信息
        alignment_history = (
            self.mode != 'train' and not self.use_beamsearch_decode
        )

        def cell_input_fn(inputs, attention):
            """根据attn_input_feeding属性来判断是否在attention计算前进行一次投影计算
              如果使用残差网络，需要先进行投影
            """
            #如果不使用残差网络
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)
            #如果使用残差网络则需先投影
            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))
        #对基本cell进行封装变成带注意力机制的cell网络
        cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,#attention的类型
            attention_layer_size=self.hidden_units,#隐藏层单元数
            alignment_history=alignment_history,
            cell_input_fn=cell_input_fn,#输入的输入方式
            name='Attention_Wrapper')

        # 将解码器状态置为空状态
        decoder_initial_state = cell.zero_state(
            batch_size, tf.float32)
        
        
        #确保decoder_initial_state 和 decoder_initial_state 的dtype类型相同
        # 用encoder的参数去初始化解码器
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state)
        
        #返回解码器的基本单元和解码器的初始化参数
        return cell, decoder_initial_state

Пример #8

Показать файл

Файл: tacotron2.py Проект: zldzmfoq12/voice-synthesizer

    def initialize(
        self,
        inputs,
        input_lengths,
        num_speakers,
        speaker_id,
        mel_targets=None,
        linear_targets=None,
        loss_coeff=None,
        rnn_decoder_test_mode=False,
        is_randomly_initialized=False,
    ):

        is_training2 = linear_targets is not None  # test에서 이게 True로 되는데, 이게 의도한 것인가???
        is_training = not rnn_decoder_test_mode

        self.is_randomly_initialized = is_randomly_initialized

        with tf.variable_scope('inference') as scope:
            hp = self._hparams
            batch_size = tf.shape(inputs)[0]

            # Embeddings(256)
            char_embed_table = tf.get_variable(
                'embedding', [len(symbols), hp.embedding_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))

            zero_pad = True
            if zero_pad:  # transformer에 구현되어 있는 거 보고, 가져온 로직.
                # <PAD> 0 은 embedding이 0으로 고정되고, train으로 변하지 않는다. 즉, 위의 get_variable에서 잡았던 변수의 첫번째 행(<PAD>)에 대응되는 것은 사용되지 않는 것이다)
                char_embed_table = tf.concat(
                    (tf.zeros(shape=[1, hp.embedding_size]),
                     char_embed_table[1:, :]), 0)

            # [N, T_in, embedding_size]
            char_embedded_inputs = tf.nn.embedding_lookup(
                char_embed_table, inputs)

            self.num_speakers = num_speakers
            if self.num_speakers > 1:
                if hp.speaker_embedding_size != 1:  # speaker_embedding_size = f(16)
                    speaker_embed_table = tf.get_variable(
                        'speaker_embedding',
                        [self.num_speakers, hp.speaker_embedding_size],
                        dtype=tf.float32,
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.5))
                    # [N, T_in, speaker_embedding_size]
                    speaker_embed = tf.nn.embedding_lookup(
                        speaker_embed_table, speaker_id)

                if hp.model_type == 'deepvoice':
                    if hp.speaker_embedding_size == 1:
                        before_highway = get_embed(
                            speaker_id, self.num_speakers,
                            hp.enc_prenet_sizes[-1], "before_highway"
                        )  # 'enc_prenet_sizes': [f(256), f(128)]
                        encoder_rnn_init_state = get_embed(
                            speaker_id, self.num_speakers, hp.enc_rnn_size * 2,
                            "encoder_rnn_init_state")

                        attention_rnn_init_state = get_embed(
                            speaker_id, self.num_speakers,
                            hp.attention_state_size,
                            "attention_rnn_init_state")
                        decoder_rnn_init_states = [
                            get_embed(
                                speaker_id, self.num_speakers, hp.dec_rnn_size,
                                "decoder_rnn_init_states{}".format(idx + 1))
                            for idx in range(hp.dec_layer_num)
                        ]
                    else:
                        deep_dense = lambda x, dim: tf.layers.dense(
                            x, dim, activation=tf.nn.softsign
                        )  # softsign: x / (abs(x) + 1)

                        before_highway = deep_dense(speaker_embed,
                                                    hp.enc_prenet_sizes[-1])
                        encoder_rnn_init_state = deep_dense(
                            speaker_embed, hp.enc_rnn_size * 2)

                        attention_rnn_init_state = deep_dense(
                            speaker_embed, hp.attention_state_size)
                        decoder_rnn_init_states = [
                            deep_dense(speaker_embed, hp.dec_rnn_size)
                            for _ in range(hp.dec_layer_num)
                        ]

                    speaker_embed = None  # deepvoice does not use speaker_embed directly
                elif hp.model_type == 'simple':
                    # simple model은 speaker_embed를 DecoderPrenetWrapper,ConcatOutputAndAttentionWrapper에 각각 넣어서 concat하는 방식이다.
                    before_highway = None
                    encoder_rnn_init_state = None
                    attention_rnn_init_state = None
                    decoder_rnn_init_states = None
                else:
                    raise Exception(
                        " [!] Unkown multi-speaker model type: {}".format(
                            hp.model_type))
            else:
                # self.num_speakers =1인 경우
                speaker_embed = None
                before_highway = None
                encoder_rnn_init_state = None  # bidirectional GRU의 init state
                attention_rnn_init_state = None
                decoder_rnn_init_states = None

            ##############
            # Encoder
            ##############

            # [N, T_in, enc_prenet_sizes[-1]]
            prenet_outputs = prenet(
                char_embedded_inputs,
                is_training,
                hp.enc_prenet_sizes,
                hp.dropout_prob,
                scope='prenet'
            )  # 'enc_prenet_sizes': [f(256), f(128)],  dropout_prob = 0.5
            # ==> (N, T_in, 128)

            # enc_rnn_size = 128
            encoder_outputs = cbhg(
                prenet_outputs,
                input_lengths,
                is_training,
                hp.enc_bank_size,
                hp.enc_bank_channel_size,
                hp.enc_maxpool_width,
                hp.enc_highway_depth,
                hp.enc_rnn_size,
                hp.enc_proj_sizes,
                hp.enc_proj_width,
                scope="encoder_cbhg",
                before_highway=before_highway,
                encoder_rnn_init_state=encoder_rnn_init_state)

            ##############
            # Attention
            ##############

            # For manaul control of attention
            self.is_manual_attention = tf.placeholder(
                tf.bool,
                shape=(),
                name='is_manual_attention',
            )
            self.manual_alignments = tf.placeholder(
                tf.float32,
                shape=[None, None, None],
                name="manual_alignments",
            )

            # single: attention_size = 128
            if hp.attention_type == 'bah_mon':
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=False)
            elif hp.attention_type == 'bah_mon_norm':  # hccho 추가
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'loc_sen':  # Location Sensitivity Attention
                attention_mechanism = LocationSensitiveAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'gmm':  # GMM Attention
                attention_mechanism = GmmAttention(
                    hp.attention_size,
                    memory=encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah_mon_norm_hccho':
                attention_mechanism = BahdanauMonotonicAttention_hccho(
                    hp.attention_size, encoder_outputs, normalize=True)
            elif hp.attention_type == 'bah_norm':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'luong_scaled':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    scale=True)
            elif hp.attention_type == 'luong':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            else:
                raise Exception(" [!] Unkown attention type: {}".format(
                    hp.attention_type))

            # DecoderPrenetWrapper, attention_mechanism을 결합하여 AttentionWrapper를 만든다.
            # carpedm20은  tensorflow 소스를코드를 가져와서 AttentionWrapper를 새로 구현했지만,  keith Ito는 tensorflow AttentionWrapper를 그냥 사용했다.
            attention_cell = AttentionWrapper(
                GRUCell(hp.attention_state_size),
                attention_mechanism,
                self.is_manual_attention,
                self.manual_alignments,
                initial_cell_state=attention_rnn_init_state,
                alignment_history=True,
                output_attention=False
            )  # output_attention=False 에 주목, attention_layer_size에 값을 넣지 않았다. 그래서 attention = contex vector가 된다.

            # attention_state_size = 256
            dec_prenet_outputs = DecoderPrenetWrapper(
                attention_cell, speaker_embed, is_training,
                hp.dec_prenet_sizes,
                hp.dropout_prob)  # dec_prenet_sizes =  [f(256), f(128)]

            # Concatenate attention context vector and RNN cell output into a 512D vector.
            # [N, T_in, attention_size+attention_state_size]

            #dec_prenet_outputs의 다음 cell에 전달하는 AttentionWrapperState의 member (attention,cell_state, ...)에서 attention과 output을 concat하여 output으로 내보낸다.
            # output이 output은 cell_state와 같기 때문에, concat [ output(=cell_state) | attention ]
            concat_cell = ConcatOutputAndAttentionWrapper(
                dec_prenet_outputs, embed_to_concat=speaker_embed
            )  # concat(output,attention,speaker_embed)해서 새로운 output을 만든다.

            # Decoder (layers specified bottom to top):  dec_rnn_size= 256
            cells = [OutputProjectionWrapper(concat_cell, hp.dec_rnn_size)
                     ]  # OutputProjectionWrapper는 논문에 언급이 없는 것 같은데...
            for _ in range(hp.dec_layer_num):  # hp.dec_layer_num = 2
                cells.append(ResidualWrapper(GRUCell(hp.dec_rnn_size)))

            # [N, T_in, 256]
            decoder_cell = MultiRNNCell(cells, state_is_tuple=True)

            # Project onto r mel spectrograms (predict r outputs at each RNN step):
            output_cell = OutputProjectionWrapper(
                decoder_cell, hp.num_mels * hp.reduction_factor
            )  # 여기에 stop token도 나올 수 있도록...수정하면 되지 않을까???   (hp.num_mels+1) * hp.reduction_factor
            decoder_init_state = output_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32
            )  # 여기서 zero_state를 부르면, 위의 AttentionWrapper에서 이미 넣은 준 값도 포함되어 있다.

            if hp.model_type == "deepvoice":
                # decoder_init_state[0] : AttentionWrapperState
                # = cell_state + attention + time + alignments + alignment_history
                # decoder_init_state[0][0] = attention_rnn_init_state (already applied: AttentionWrapper의 initial_cell_state를 이미 넣어 주었다. )
                decoder_init_state = list(decoder_init_state)

                for idx, cell in enumerate(decoder_rnn_init_states):
                    shape1 = decoder_init_state[idx + 1].get_shape().as_list()
                    shape2 = cell.get_shape().as_list()
                    if shape1 != shape2:
                        raise Exception(
                            " [!] Shape {} and {} should be equal".format(
                                shape1, shape2))
                    decoder_init_state[idx + 1] = cell

                decoder_init_state = tuple(decoder_init_state)

            if is_training2:
                # rnn_decoder_test_mode = True if test mode,  train mode에서는 False
                helper = TacoTrainingHelper(
                    inputs, mel_targets, hp.num_mels, hp.reduction_factor,
                    rnn_decoder_test_mode)  # inputs은 batch_size 계산에만 사용됨
            else:
                helper = TacoTestHelper(batch_size, hp.num_mels,
                                        hp.reduction_factor)

            (decoder_outputs, _), final_decoder_state, _ = \
                    tf.contrib.seq2seq.dynamic_decode(BasicDecoder(output_cell, helper, decoder_init_state),maximum_iterations=hp.max_iters)  # max_iters=200

            # [N, T_out, M]
            mel_outputs = tf.reshape(decoder_outputs,
                                     [batch_size, -1, hp.num_mels])

            # Add post-processing CBHG:
            # [N, T_out, 256]
            #post_outputs = post_cbhg(mel_outputs, hp.num_mels, is_training)
            post_outputs = cbhg(mel_outputs,
                                None,
                                is_training,
                                hp.post_bank_size,
                                hp.post_bank_channel_size,
                                hp.post_maxpool_width,
                                hp.post_highway_depth,
                                hp.post_rnn_size,
                                hp.post_proj_sizes,
                                hp.post_proj_width,
                                scope='post_cbhg')

            if speaker_embed is not None and hp.model_type == 'simple':
                expanded_speaker_emb = tf.expand_dims(speaker_embed, [1])
                tiled_speaker_embedding = tf.tile(
                    expanded_speaker_emb, [1, tf.shape(post_outputs)[1], 1])

                # [N, T_out, 256 + alpha]
                post_outputs = tf.concat(
                    [tiled_speaker_embedding, post_outputs], axis=-1)

            linear_outputs = tf.layers.dense(
                post_outputs, hp.num_freq)  # [N, T_out, F(1025)]

            # Grab alignments from the final decoder state:
            # MultiRNNCell이 3단이기 때문에, final_decoder_state는 len 3 tuple이다.  ==> final_decoder_state[0]
            alignments = tf.transpose(
                final_decoder_state[0].alignment_history.stack(),
                [1, 2, 0
                 ])  # batch_size, text length(encoder), target length(decoder)

            self.inputs = inputs
            self.speaker_id = speaker_id
            self.input_lengths = input_lengths
            self.loss_coeff = loss_coeff
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
            self.final_decoder_state = final_decoder_state

            log('=' * 40)
            log(' model_type: %s' % hp.model_type)
            log('=' * 40)

            log('Initialized Tacotron model. Dimensions: ')
            log('    embedding:                %d' %
                char_embedded_inputs.shape[-1])
            if speaker_embed is not None:
                log('    speaker embedding:        %d' %
                    speaker_embed.shape[-1])
            else:
                log('    speaker embedding:        None')
            log('    prenet out:               %d' % prenet_outputs.shape[-1])
            log('    encoder out:              %d' % encoder_outputs.shape[-1])
            log('    attention out:            %d' %
                attention_cell.output_size)
            log('    concat attn & out:        %d' % concat_cell.output_size)
            log('    decoder cell out:         %d' % decoder_cell.output_size)
            log('    decoder out (%d frames):  %d' %
                (hp.reduction_factor, decoder_outputs.shape[-1]))
            log('    decoder out (1 frame):    %d' % mel_outputs.shape[-1])
            log('    postnet out:              %d' % post_outputs.shape[-1])
            log('    linear out:               %d' % linear_outputs.shape[-1])

Пример #9

Показать файл

Файл: Sequence_to_sequence.py Проект: zhanglv0209/Chatbot_CN

    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """构建解码器cell"""

        encoder_inputs_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        # 使用 BeamSearchDecoder 的时候，必须根据 beam_width 来成倍的扩大一些变量
        # encoder_outputs, encoder_state, encoder_inputs_length
        # needs to be tiled so that:
        # [batch_size, .., ..] -> [batch_size x beam_width, .., ..]
        if self.use_beamsearch_decode:
            encoder_outputs = seq2seq.tile_batch(encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_state = seq2seq.tile_batch(encoder_state,
                                               multiplier=self.beam_width)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)
            # 如果使用了 beamsearch 那么输入应该是 beam_width 倍于 batch_size 的
            batch_size *= self.beam_width

        # 下面是两种不同的 Attention 机制
        if self.attention_type.lower() == 'luong':
            # 'Luong' style attention: https://arxiv.org/abs/1508.04025
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)
        else:  # Default Bahdanau
            # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)

        # Building decoder_cell
        cell = MultiRNNCell([
            self.build_single_cell(self.hidden_units,
                                   use_residual=self.use_residual)
            for _ in range(self.depth)
        ])

        # 在非训练（预测）模式，并且没开启 beamsearch 的时候，打开 attention 历史信息
        alignment_history = (self.mode != 'train'
                             and not self.use_beamsearch_decode)

        def cell_input_fn(inputs, attention):
            """根据attn_input_feeding属性来判断是否在attention计算前进行一次投影计算
            """
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(cell=cell,
                                attention_mechanism=self.attention_mechanism,
                                attention_layer_size=self.hidden_units,
                                alignment_history=alignment_history,
                                cell_input_fn=cell_input_fn,
                                name='Attention_Wrapper')

        # 空状态
        decoder_initial_state = cell.zero_state(batch_size, tf.float32)

        # 传递encoder状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state)

        # if self.use_beamsearch_decode:
        #     decoder_initial_state = seq2seq.tile_batch(
        #         decoder_initial_state, multiplier=self.beam_width)

        return cell, decoder_initial_state

Пример #10

Показать файл

Файл: tacotron2.py Проект: minsulee2/speak_like_zhang

    def initialize(self,
                   inputs,
                   input_lengths,
                   num_speakers,
                   speaker_id=None,
                   mel_targets=None,
                   linear_targets=None,
                   is_training=False,
                   loss_coeff=None,
                   stop_token_targets=None):

        with tf.variable_scope('Eembedding') as scope:
            hp = self._hparams
            batch_size = tf.shape(inputs)[0]

            # Embeddings(256)
            char_embed_table = tf.get_variable(
                'inputs_embedding', [len(symbols), hp.embedding_size],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))

            zero_pad = True
            if zero_pad:  # transformer에 구현되어 있는 거 보고, 가져온 로직.
                # <PAD> 0 은 embedding이 0으로 고정되고, train으로 변하지 않는다. 즉, 위의 get_variable에서 잡았던 변수의 첫번째 행(<PAD>)에 대응되는 것은 사용되지 않는 것이다)
                char_embed_table = tf.concat(
                    (tf.zeros(shape=[1, hp.embedding_size]),
                     char_embed_table[1:, :]), 0)

            # [N, T_in, embedding_size]
            char_embedded_inputs = tf.nn.embedding_lookup(
                char_embed_table, inputs)

            self.num_speakers = num_speakers
            if self.num_speakers > 1:
                speaker_embed_table = tf.get_variable(
                    'speaker_embedding',
                    [self.num_speakers, hp.speaker_embedding_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=0.5))
                # [N, T_in, speaker_embedding_size]
                speaker_embed = tf.nn.embedding_lookup(speaker_embed_table,
                                                       speaker_id)

                deep_dense = lambda x, dim, name: tf.layers.dense(
                    x, dim, activation=tf.nn.softsign, name=name
                )  # softsign: x / (abs(x) + 1)

                encoder_rnn_init_state = deep_dense(
                    speaker_embed, hp.encoder_lstm_units * 4,
                    'encoder_init_dense')  # hp.encoder_lstm_units = 256

                decoder_rnn_init_states = [
                    deep_dense(speaker_embed, hp.decoder_lstm_units * 2,
                               'decoder_init_dense_{}'.format(i))
                    for i in range(hp.decoder_layers)
                ]  # hp.decoder_lstm_units = 1024

                speaker_embed = None
            else:
                # self.num_speakers =1인 경우
                speaker_embed = None
                encoder_rnn_init_state = None  # bidirectional GRU의 init state
                attention_rnn_init_state = None
                decoder_rnn_init_states = None

        with tf.variable_scope('Encoder') as scope:
            ##############
            # Encoder
            ##############
            x = char_embedded_inputs
            for i in range(hp.enc_conv_num_layers):
                x = tf.layers.conv1d(x,
                                     filters=hp.enc_conv_channels,
                                     kernel_size=hp.enc_conv_kernel_size,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='Encoder_{}'.format(i))
                x = tf.layers.batch_normalization(x, training=is_training)
                x = tf.layers.dropout(x,
                                      rate=hp.dropout_prob,
                                      training=is_training,
                                      name='dropout_{}'.format(i))

            if encoder_rnn_init_state is not None:
                initial_state_fw_c, initial_state_fw_h, initial_state_bw_c, initial_state_bw_h = tf.split(
                    encoder_rnn_init_state, 4, 1)
                initial_state_fw = LSTMStateTuple(initial_state_fw_c,
                                                  initial_state_fw_h)
                initial_state_bw = LSTMStateTuple(initial_state_bw_c,
                                                  initial_state_bw_h)
            else:  # single mode
                initial_state_fw, initial_state_bw = None, None

            cell_fw = ZoneoutLSTMCell(
                hp.encoder_lstm_units,
                is_training,
                zoneout_factor_cell=hp.tacotron_zoneout_rate,
                zoneout_factor_output=hp.tacotron_zoneout_rate,
                name='encoder_fw_LSTM')
            cell_bw = ZoneoutLSTMCell(
                hp.encoder_lstm_units,
                is_training,
                zoneout_factor_cell=hp.tacotron_zoneout_rate,
                zoneout_factor_output=hp.tacotron_zoneout_rate,
                name='encoder_fw_LSTM')
            encoder_conv_output = x
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                encoder_conv_output,
                sequence_length=input_lengths,
                initial_state_fw=initial_state_fw,
                initial_state_bw=initial_state_bw,
                dtype=tf.float32)

            # envoder_outpust = [N,T,2*encoder_lstm_units] = [N,T,512]
            encoder_outputs = tf.concat(
                outputs,
                axis=2)  # Concat and return forward + backward outputs

        with tf.variable_scope('Decoder') as scope:

            ##############
            # Attention
            ##############
            if hp.attention_type == 'bah_mon':
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=False)
            elif hp.attention_type == 'bah_mon_norm':  # hccho 추가
                attention_mechanism = BahdanauMonotonicAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'loc_sen':  # Location Sensitivity Attention
                attention_mechanism = LocationSensitiveAttention(
                    hp.attention_size,
                    encoder_outputs,
                    hparams=hp,
                    is_training=is_training,
                    mask_encoder=hp.mask_encoder,
                    memory_sequence_length=input_lengths,
                    smoothing=hp.smoothing,
                    cumulate_weights=hp.cumulative_weights)
            elif hp.attention_type == 'gmm':  # GMM Attention
                attention_mechanism = GmmAttention(
                    hp.attention_size,
                    memory=encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah_norm':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'luong_scaled':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    scale=True)
            elif hp.attention_type == 'luong':
                attention_mechanism = LuongAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'bah':
                attention_mechanism = BahdanauAttention(
                    hp.attention_size,
                    encoder_outputs,
                    memory_sequence_length=input_lengths)
            else:
                raise Exception(" [!] Unkown attention type: {}".format(
                    hp.attention_type))

            decoder_lstm = [
                ZoneoutLSTMCell(hp.decoder_lstm_units,
                                is_training,
                                zoneout_factor_cell=hp.tacotron_zoneout_rate,
                                zoneout_factor_output=hp.tacotron_zoneout_rate,
                                name='decoder_LSTM_{}'.format(i + 1))
                for i in range(hp.decoder_layers)
            ]

            decoder_lstm = tf.contrib.rnn.MultiRNNCell(decoder_lstm,
                                                       state_is_tuple=True)
            decoder_init_state = decoder_lstm.zero_state(
                batch_size=batch_size, dtype=tf.float32
            )  # 여기서 zero_state를 부르면, 위의 AttentionWrapper에서 이미 넣은 준 값도 포함되어 있다.

            if hp.model_type == "multi-speaker":

                decoder_init_state = list(decoder_init_state)

                for idx, cell in enumerate(decoder_rnn_init_states):
                    shape1 = decoder_init_state[idx][0].get_shape().as_list()
                    shape2 = cell.get_shape().as_list()
                    if shape1[1] * 2 != shape2[1]:
                        raise Exception(
                            " [!] Shape {} and {} should be equal".format(
                                shape1, shape2))
                    c, h = tf.split(cell, 2, 1)
                    decoder_init_state[idx] = LSTMStateTuple(c, h)

                decoder_init_state = tuple(decoder_init_state)

            attention_cell = AttentionWrapper(
                decoder_lstm,
                attention_mechanism,
                initial_cell_state=decoder_init_state,
                alignment_history=True,
                output_attention=False
            )  # output_attention=False 에 주목, attention_layer_size에 값을 넣지 않았다. 그래서 attention = contex vector가 된다.

            # attention_state_size = 256
            # Decoder input -> prenet -> decoder_lstm -> concat[output, attention]
            dec_prenet_outputs = DecoderWrapper(attention_cell, is_training,
                                                hp.dec_prenet_sizes,
                                                hp.dropout_prob,
                                                hp.inference_prenet_dropout)

            dec_outputs_cell = OutputProjectionWrapper(
                dec_prenet_outputs, (hp.num_mels + 1) * hp.reduction_factor)

            if is_training:
                helper = TacoTrainingHelper(
                    mel_targets, hp.num_mels,
                    hp.reduction_factor)  # inputs은 batch_size 계산에만 사용됨
            else:
                helper = TacoTestHelper(batch_size, hp.num_mels,
                                        hp.reduction_factor)

            decoder_init_state = dec_outputs_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32)
            (decoder_outputs, _), final_decoder_state, _ = \
                    tf.contrib.seq2seq.dynamic_decode(BasicDecoder(dec_outputs_cell, helper, decoder_init_state),maximum_iterations=int(hp.max_n_frame/hp.reduction_factor))  # max_iters=200

            decoder_mel_outputs = tf.reshape(
                decoder_outputs[:, :, :hp.num_mels * hp.reduction_factor],
                [batch_size, -1, hp.num_mels
                 ])  # [N,iters,400] -> [N,5*iters,80]
            stop_token_outputs = tf.reshape(
                decoder_outputs[:, :, hp.num_mels * hp.reduction_factor:],
                [batch_size, -1])  # [N,iters]

            # Postnet
            x = decoder_mel_outputs
            for i in range(hp.postnet_num_layers):
                activation = tf.nn.tanh if i != (hp.postnet_num_layers -
                                                 1) else None
                x = tf.layers.conv1d(x,
                                     filters=hp.postnet_channels,
                                     kernel_size=hp.postnet_kernel_size,
                                     padding='same',
                                     activation=activation,
                                     name='Postnet_{}'.format(i))
                x = tf.layers.batch_normalization(x, training=is_training)
                x = tf.layers.dropout(x,
                                      rate=hp.dropout_prob,
                                      training=is_training,
                                      name='Postnet_dropout_{}'.format(i))

            residual = tf.layers.dense(x,
                                       hp.num_mels,
                                       name='residual_projection')
            mel_outputs = decoder_mel_outputs + residual

            # Add post-processing CBHG:
            # mel_outputs: (N,T,num_mels)
            post_outputs = cbhg(mel_outputs,
                                None,
                                is_training,
                                hp.post_bank_size,
                                hp.post_bank_channel_size,
                                hp.post_maxpool_width,
                                hp.post_highway_depth,
                                hp.post_rnn_size,
                                hp.post_proj_sizes,
                                hp.post_proj_width,
                                scope='post_cbhg')

            linear_outputs = tf.layers.dense(
                post_outputs, hp.num_freq,
                name='linear_spectogram_projection')  # [N, T_out, F(1025)]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state.alignment_history.stack(),
                [1, 2, 0
                 ])  # batch_size, text length(encoder), target length(decoder)

            self.inputs = inputs
            self.speaker_id = speaker_id
            self.input_lengths = input_lengths
            self.loss_coeff = loss_coeff
            self.decoder_mel_outputs = decoder_mel_outputs
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
            self.final_decoder_state = final_decoder_state
            self.stop_token_targets = stop_token_targets
            self.stop_token_outputs = stop_token_outputs
            self.all_vars = tf.trainable_variables()
            log('=' * 40)
            log(' model_type: %s' % hp.model_type)
            log('=' * 40)

            log('Initialized Tacotron model. Dimensions: ')
            log('    embedding:                %d' %
                char_embedded_inputs.shape[-1])
            log('    encoder conv out:               %d' %
                encoder_conv_output.shape[-1])
            log('    encoder out:              %d' % encoder_outputs.shape[-1])
            log('    attention out:            %d' %
                attention_cell.output_size)
            log('    decoder prenet lstm concat out :        %d' %
                dec_prenet_outputs.output_size)
            log('    decoder cell out:         %d' %
                dec_outputs_cell.output_size)
            log('    decoder out (%d frames):  %d' %
                (hp.reduction_factor, decoder_outputs.shape[-1]))
            log('    decoder mel out:    %d' % decoder_mel_outputs.shape[-1])
            log('    mel out:    %d' % mel_outputs.shape[-1])
            log('    postnet out:              %d' % post_outputs.shape[-1])
            log('    linear out:               %d' % linear_outputs.shape[-1])
            log('  Tacotron Parameters       {:.3f} Million.'.format(
                np.sum(
                    [np.prod(v.get_shape().as_list())
                     for v in self.all_vars]) / 1000000))

Пример #11

Показать файл

    def add_decoder_op(self, enc_final_state, enc_hidden_states,
                       output_embed_matrix, training):
        original_enc_final_state = enc_final_state
        flat_enc_final_state = nest.flatten(enc_final_state)
        enc_final_state = tf.concat(flat_enc_final_state, axis=1)
        enc_final_size = int(enc_final_state.get_shape()[1])

        part_logit_preds = dict()
        part_token_preds = dict()
        part_logit_sequence_preds = dict()
        part_token_sequence_preds = dict()
        part_layers = []
        grammar = self.config.grammar
        for i, part in enumerate(('trigger', 'query', 'action')):
            with tf.variable_scope('decode_function_' + part):
                activation = getattr(
                    tf.nn, self.config.function_nonlinearity) if hasattr(
                        tf.nn, self.config.function_nonlinearity) else getattr(
                            tf, self.config.function_nonlinearity)
                layer = tf.contrib.layers.fully_connected(
                    enc_final_state,
                    self.config.function_hidden_size,
                    activation_fn=activation)
                part_layers.append(layer)
                layer_with_dropout = tf.nn.dropout(
                    layer, keep_prob=self.dropout_placeholder, seed=443 * i)
                part_logit_preds[part] = tf.layers.dense(
                    layer_with_dropout, len(grammar.functions[part]))
                part_token_preds[part] = tf.cast(tf.argmax(
                    part_logit_preds[part], axis=1),
                                                 dtype=tf.int32)

        first_value_token = grammar.num_functions + grammar.num_begin_tokens + grammar.num_control_tokens
        num_value_tokens = grammar.output_size - first_value_token
        output_embed_matrix = tf.concat(
            (output_embed_matrix[0:grammar.num_control_tokens],
             output_embed_matrix[first_value_token:]),
            axis=0)

        adjusted_trigger = part_token_preds['trigger'] + (
            grammar.num_control_tokens + grammar.num_begin_tokens)
        adjusted_query = part_token_preds['query'] + (
            grammar.num_control_tokens + grammar.num_begin_tokens +
            len(grammar.functions['trigger']))
        adjusted_action = part_token_preds['action'] + (
            grammar.num_control_tokens + grammar.num_begin_tokens +
            len(grammar.functions['trigger']) +
            len(grammar.functions['query']))

        layer_concat = tf.concat(part_layers, axis=1)
        for i, part in enumerate(('trigger', 'query', 'action')):
            with tf.variable_scope('decode_sequence_' + part):

                def one_decoder_input(i, like):
                    with tf.variable_scope(str(i)):
                        return tf.layers.dense(layer_concat,
                                               like.get_shape()[1])

                flat_decoder_initial_state = [
                    one_decoder_input(i, like)
                    for i, like in enumerate(flat_enc_final_state)
                ]
                decoder_initial_state = nest.pack_sequence_as(
                    original_enc_final_state, flat_decoder_initial_state)
                cell_dec = tf.contrib.rnn.MultiRNNCell([
                    self.make_rnn_cell(i, True)
                    for i in range(self.config.rnn_layers)
                ])

                # uncompress function tokens (to look them up in the grammar)
                if training:
                    adjusted_function_token = self.part_function_placeholders[
                        part]
                else:
                    if part == 'trigger':
                        adjusted_function_token = adjusted_trigger
                    elif part == 'query':
                        adjusted_function_token = adjusted_query
                    elif part == 'action':
                        adjusted_function_token = adjusted_action

                # adjust the sequence to "skip" function tokens
                output_size = grammar.num_control_tokens + num_value_tokens
                output = self.part_sequence_placeholders[part]
                adjusted_output = tf.where(
                    output >= grammar.num_control_tokens,
                    output - (first_value_token - grammar.num_control_tokens),
                    output)

                if self.config.apply_attention:
                    attention = LuongAttention(self.config.decoder_hidden_size,
                                               enc_hidden_states,
                                               self.input_length_placeholder,
                                               probability_fn=tf.nn.softmax)
                    cell_dec = AttentionWrapper(
                        cell_dec,
                        attention,
                        cell_input_fn=lambda inputs, _: inputs,
                        attention_layer_size=self.config.decoder_hidden_size,
                        initial_cell_state=decoder_initial_state)
                    decoder_initial_state = cell_dec.zero_state(
                        self.batch_size, dtype=tf.float32)
                decoder = Seq2SeqDecoder(
                    self.config,
                    self.input_placeholder,
                    self.input_length_placeholder,
                    adjusted_output,
                    self.part_sequence_length_placeholders[part],
                    self.batch_number_placeholder,
                    max_length=MAX_PRIMITIVE_LENGTH)
                rnn_output, sample_ids = decoder.decode(
                    cell_dec,
                    decoder_initial_state,
                    output_size,
                    output_embed_matrix,
                    training,
                    grammar_helper=PrimitiveSequenceGrammarHelper(
                        grammar, adjusted_function_token))
                part_logit_sequence_preds[part] = rnn_output
                part_token_sequence_preds[part] = tf.cast(sample_ids,
                                                          dtype=tf.int32)

        with tf.variable_scope('top_classifier'):
            top_hidden = tf.contrib.layers.fully_connected(
                enc_final_state,
                self.config.first_token_hidden_size,
                activation_fn=tf.tanh)
            top_hidden_with_dropout = tf.nn.dropout(
                top_hidden, keep_prob=self.dropout_placeholder, seed=127)
            top_logits = tf.layers.dense(top_hidden_with_dropout,
                                         grammar.num_begin_tokens)
            top_token = tf.cast(tf.argmax(top_logits, axis=1), dtype=tf.int32)

        with tf.variable_scope('decode_special'):
            output_size = grammar.num_control_tokens + num_value_tokens
            output = self.special_label_placeholder
            adjusted_output = tf.where(
                output >= grammar.num_control_tokens,
                output - (first_value_token - grammar.num_control_tokens),
                output)
            cell_dec = tf.contrib.rnn.MultiRNNCell([
                self.make_rnn_cell(i, True)
                for i in range(self.config.rnn_layers)
            ])

            sequence_length = tf.ones(
                (self.batch_size, ), dtype=tf.int32) * MAX_SPECIAL_LENGTH
            decoder_initial_state = original_enc_final_state
            if self.config.apply_attention:
                attention = LuongAttention(self.config.decoder_hidden_size,
                                           enc_hidden_states,
                                           self.input_length_placeholder,
                                           probability_fn=tf.nn.softmax)
                cell_dec = AttentionWrapper(
                    cell_dec,
                    attention,
                    cell_input_fn=lambda inputs, _: inputs,
                    attention_layer_size=self.config.decoder_hidden_size,
                    initial_cell_state=original_enc_final_state)
                decoder_initial_state = cell_dec.zero_state(self.batch_size,
                                                            dtype=tf.float32)
            decoder = Seq2SeqDecoder(self.config,
                                     self.input_placeholder,
                                     self.input_length_placeholder,
                                     adjusted_output,
                                     sequence_length,
                                     self.batch_number_placeholder,
                                     max_length=MAX_SPECIAL_LENGTH)
            rnn_output, sample_ids = decoder.decode(
                cell_dec,
                decoder_initial_state,
                output_size,
                output_embed_matrix,
                training,
                grammar_helper=SpecialSequenceGrammarHelper(grammar))
            logit_special_sequence = rnn_output
            token_special_sequence = tf.cast(sample_ids, dtype=tf.int32)

        # adjust tokens back to their output code
        adjusted_top = tf.expand_dims(top_token + grammar.num_control_tokens,
                                      axis=1)

        adjusted_special_sequence = tf.where(
            token_special_sequence >= grammar.num_control_tokens,
            token_special_sequence +
            (first_value_token - grammar.num_control_tokens),
            token_special_sequence)

        adjusted_token_sequences = dict()
        for part in ('trigger', 'query', 'action'):
            token_sequence = part_token_sequence_preds[part]
            adjusted_token_sequence = tf.where(
                token_sequence >= grammar.num_control_tokens, token_sequence +
                (first_value_token - grammar.num_control_tokens),
                token_sequence)
            adjusted_token_sequences[part] = adjusted_token_sequence
        # remove EOS from the middle of the sentence
        adjusted_token_sequences['trigger'] = tf.where(
            tf.equal(adjusted_token_sequences['trigger'], grammar.end),
            tf.zeros_like(adjusted_token_sequences['trigger']),
            adjusted_token_sequences['trigger'])
        adjusted_token_sequences['query'] = tf.where(
            tf.equal(adjusted_token_sequences['query'], grammar.end),
            tf.zeros_like(adjusted_token_sequences['query']),
            adjusted_token_sequences['query'])

        adjusted_trigger = tf.expand_dims(adjusted_trigger, axis=1)
        adjusted_query = tf.expand_dims(adjusted_query, axis=1)
        adjusted_action = tf.expand_dims(adjusted_action, axis=1)

        program_sequence = tf.concat(
            (adjusted_top, adjusted_trigger,
             adjusted_token_sequences['trigger'], adjusted_query,
             adjusted_token_sequences['query'], adjusted_action,
             adjusted_token_sequences['action']),
            axis=1)
        full_special_sequence = tf.concat(
            (adjusted_top, adjusted_special_sequence), axis=1)
        # full special sequence is smaller than program sequence, so we need to pad it all the way to the same shape
        full_special_sequence = pad_up_to(full_special_sequence,
                                          tf.shape(program_sequence)[1],
                                          rank=1)

        rule_token = grammar.dictionary['rule'] - grammar.num_control_tokens
        full_sequence = tf.where(tf.equal(top_token, rule_token),
                                 program_sequence, full_special_sequence)

        return ThreePartAlignerResult(top_logits, part_logit_preds,
                                      part_logit_sequence_preds,
                                      logit_special_sequence, full_sequence)

Пример #12

Показать файл

    def initialize(self,
                   inputs,
                   input_lengths,
                   mel_targets=None,
                   linear_targets=None,
                   stop_token_targets=None):
        '''Initializes the model for inference.

        Sets "mel_outputs", "linear_outputs", and "alignments" fields.

        Args:
          inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of
            steps in the input time series, and values are character IDs
          input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths
            of each sequence in inputs.
          mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number
            of steps in the output time series, M is num_mels, and values are entries in the mel
            spectrogram. Only needed for training.
          linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number
            of steps in the output time series, F is num_freq, and values are entries in the linear
            spectrogram. Only needed for training.
        '''
        with tf.variable_scope('inference') as scope:
            is_training = linear_targets is not None
            batch_size = tf.shape(inputs)[0]
            hp = self._hparams

            # Embeddings
            embedding_table = tf.get_variable(
                'embedding', [len(symbols), hp.embed_depth],
                dtype=tf.float32,
                initializer=tf.truncated_normal_initializer(stddev=0.5))

            embedded_inputs = tf.nn.embedding_lookup(
                embedding_table, inputs)  # [N, T_in, embed_depth=256]

        with tf.variable_scope('Encoder') as scope:

            x = embedded_inputs

            #3 Conv Layers
            for i in range(3):
                x = tf.layers.conv1d(x,
                                     filters=512,
                                     kernel_size=5,
                                     padding='same',
                                     activation=tf.nn.relu,
                                     name='Encoder_{}'.format(i))
                x = tf.layers.batch_normalization(x, training=is_training)
                x = tf.layers.dropout(x,
                                      rate=0.5,
                                      training=is_training,
                                      name='dropout_{}'.format(i))
            encoder_conv_output = x

            #bi-directional LSTM
            cell_fw = ZoneoutLSTMCell(256,
                                      is_training,
                                      zoneout_factor_cell=0.1,
                                      zoneout_factor_output=0.1,
                                      name='encoder_fw_LSTM')
            cell_bw = ZoneoutLSTMCell(256,
                                      is_training,
                                      zoneout_factor_cell=0.1,
                                      zoneout_factor_output=0.1,
                                      name='encoder_bw_LSTM')

            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                encoder_conv_output,
                sequence_length=input_lengths,
                dtype=tf.float32)

            # envoder_outpust = [N,T,2*encoder_lstm_units] = [N,T,512]
            encoder_outputs = tf.concat(
                outputs,
                axis=2)  # Concat and return forward + backward outputs

        with tf.variable_scope('Decoder') as scope:

            if hp.attention_type == 'loc_sen':  # Location Sensitivity Attention
                attention_mechanism = LocationSensitiveAttention(
                    128,
                    encoder_outputs,
                    hparams=hp,
                    is_training=is_training,
                    mask_encoder=True,
                    memory_sequence_length=input_lengths,
                    smoothing=False,
                    cumulate_weights=True)
            elif hp.attention_type == 'gmm':  # GMM Attention
                attention_mechanism = GmmAttention(
                    128,
                    memory=encoder_outputs,
                    memory_sequence_length=input_lengths)
            elif hp.attention_type == 'step_bah':
                attention_mechanism = BahdanauStepwiseMonotonicAttention(
                    128,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    mode="parallel")
            elif hp.attention_type == 'mon_bah':
                attention_mechanism = BahdanauMonotonicAttention(
                    128,
                    encoder_outputs,
                    memory_sequence_length=input_lengths,
                    normalize=True)
            elif hp.attention_type == 'loung':
                attention_mechanism = LuongAttention(
                    128, encoder_outputs, memory_sequence_length=input_lengths)

            # attention_mechanism = LocationSensitiveAttention(128, encoder_outputs, hparams=hp, is_training=is_training, mask_encoder=True, memory_sequence_length = input_lengths, smoothing=False, cumulate_weights=True)
            #mask_encoder: whether to mask encoder padding while computing location sensitive attention. Set to True for better prosody but slower convergence.
            #cumulate_weights: Whether to cumulate (sum) all previous attention weights or simply feed previous weights (Recommended: True)

            decoder_lstm = [
                ZoneoutLSTMCell(1024,
                                is_training,
                                zoneout_factor_cell=0.1,
                                zoneout_factor_output=0.1,
                                name='decoder_LSTM_{}'.format(i + 1))
                for i in range(2)
            ]

            decoder_lstm = tf.contrib.rnn.MultiRNNCell(decoder_lstm,
                                                       state_is_tuple=True)
            # decoder_init_state = decoder_lstm.zero_state(batch_size=batch_size, dtype=tf.float32) #tensorflow1에는 없음

            attention_cell = AttentionWrapper(decoder_lstm,
                                              attention_mechanism,
                                              alignment_history=True,
                                              output_attention=False)

            # attention_state_size = 256
            # Decoder input -> prenet -> decoder_lstm -> concat[output, attention]
            dec_outputs = DecoderPrenetWrapper(attention_cell, is_training,
                                               hp.prenet_depths)
            dec_outputs_cell = OutputProjectionWrapper(
                dec_outputs, (hp.num_mels) * hp.outputs_per_step)

            if is_training:
                helper = TacoTrainingHelper(inputs, mel_targets, hp.num_mels,
                                            hp.outputs_per_step)
            else:
                helper = TacoTestHelper(batch_size, hp.num_mels,
                                        hp.outputs_per_step)

            decoder_init_state = dec_outputs_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32)
            (decoder_outputs,
             _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode(
                 BasicDecoder(dec_outputs_cell, helper, decoder_init_state),
                 maximum_iterations=hp.max_iters)  # [N, T_out/r, M*r]

            # Reshape outputs to be one output per entry
            decoder_mel_outputs = tf.reshape(
                decoder_outputs[:, :, :hp.num_mels * hp.outputs_per_step],
                [batch_size, -1, hp.num_mels])  # [N, T_out, M]
            #stop_token_outputs = tf.reshape(decoder_outputs[:,:,hp.num_mels * hp.outputs_per_step:], [batch_size, -1]) # [N,iters]

            # Postnet
            x = decoder_mel_outputs
            for i in range(5):
                activation = tf.nn.tanh if i != (4) else None
                x = tf.layers.conv1d(x,
                                     filters=512,
                                     kernel_size=5,
                                     padding='same',
                                     activation=activation,
                                     name='Postnet_{}'.format(i))
                x = tf.layers.batch_normalization(x, training=is_training)
                x = tf.layers.dropout(x,
                                      rate=0.5,
                                      training=is_training,
                                      name='Postnet_dropout_{}'.format(i))

            residual = tf.layers.dense(x,
                                       hp.num_mels,
                                       name='residual_projection')
            mel_outputs = decoder_mel_outputs + residual

            # Add post-processing CBHG:
            # mel_outputs: (N,T,num_mels)
            post_outputs = post_cbhg(mel_outputs, hp.num_mels, is_training,
                                     hp.postnet_depth)
            linear_outputs = tf.layers.dense(
                post_outputs, hp.num_freq)  # [N, T_out, F(1025)]

            # Grab alignments from the final decoder state:
            alignments = tf.transpose(
                final_decoder_state.alignment_history.stack(),
                [1, 2, 0
                 ])  # batch_size, text length(encoder), target length(decoder)

            self.inputs = inputs
            self.input_lengths = input_lengths
            self.decoder_mel_outputs = decoder_mel_outputs
            self.mel_outputs = mel_outputs
            self.linear_outputs = linear_outputs
            self.alignments = alignments
            self.mel_targets = mel_targets
            self.linear_targets = linear_targets
            #self.stop_token_targets = stop_token_targets
            #self.stop_token_outputs = stop_token_outputs
            self.all_vars = tf.trainable_variables()
            log('Initialized Tacotron model. Dimensions: ')
            log('  embedding:               %d' % embedded_inputs.shape[-1])
            # log('  prenet out:              %d' % prenet_outputs.shape[-1])
            log('  encoder out:             %d' % encoder_outputs.shape[-1])
            log('  attention out:           %d' % attention_cell.output_size)
            #log('  concat attn & out:       %d' % concat_cell.output_size)
            log('  decoder cell out:        %d' % dec_outputs_cell.output_size)
            log('  decoder out (%d frames):  %d' %
                (hp.outputs_per_step, decoder_outputs.shape[-1]))
            log('  decoder out (1 frame):   %d' % mel_outputs.shape[-1])
            log('  postnet out:             %d' % post_outputs.shape[-1])
            log('  linear out:              %d' % linear_outputs.shape[-1])

Пример #13

Показать файл

Файл: translation_module.py Проект: hominhtri1/CVT

            def __init__(self,
                         name,
                         input_reprs,
                         roll_direction=0,
                         activate=True,
                         is_translate=False,
                         word_in=None,
                         encoder_reprs=encoder.bi_reprs):
                self.name = name
                with tf.variable_scope(name + '/predictions'):
                    #decoder_state = tf.layers.dense(input_reprs, config.projection_size, name='encoder_to_decoder')
                    decoder_state = input_reprs

                    with tf.variable_scope('word_embeddings_vi'):
                        word_embedding_matrix = tf.get_variable(
                            'word_embedding_matrix_vi',
                            initializer=pretrained_embeddings_vi)
                        if is_translate:
                            word_embeddings = tf.nn.embedding_lookup(
                                word_embedding_matrix, word_in)
                        else:
                            word_embeddings = tf.nn.embedding_lookup(
                                word_embedding_matrix, words_tgt_in)
                        word_embeddings = tf.nn.dropout(
                            word_embeddings, inputs.keep_prob)
                        word_embeddings *= tf.get_variable('emb_scale',
                                                           initializer=1.0)

                    decoder_lstm = model_helpers.lstm_cell(
                        config.bidirectional_sizes[0], inputs.keep_prob,
                        config.projection_size)

                    decoder_output_layer = tf.layers.Dense(n_classes,
                                                           name='predict')

                    if not is_translate:
                        attention_mechanism = LuongAttention(
                            num_units=config.attention_units,
                            memory=encoder_reprs,
                            memory_sequence_length=size_sr,
                            scale=True)
                        attention_cell = AttentionWrapper(
                            decoder_lstm,
                            attention_mechanism,
                            attention_layer_size=config.attention_units)

                        batch_size = tf.shape(words_tgt_in)[0]
                        decoder_initial_state = attention_cell.zero_state(
                            dtype=tf.float32,
                            batch_size=batch_size * config.beam_width)
                        decoder_state = decoder_initial_state.clone(
                            cell_state=decoder_state)

                        helper = tf.contrib.seq2seq.TrainingHelper(
                            word_embeddings, size_tgt)

                        decoder = tf.contrib.seq2seq.BasicDecoder(
                            attention_cell, helper, decoder_state,
                            decoder_output_layer)

                        outputs, state, _ = tf.contrib.seq2seq.dynamic_decode(
                            decoder)
                        # swap_memory=True)

                        self.logits = outputs.rnn_output
                    else:
                        if config.decode_mode == 'greedy':
                            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                                word_embedding_matrix,
                                [embeddings.START, embeddings.START],
                                embeddings.END)

                            decoder = tf.contrib.seq2seq.BasicDecoder(
                                decoder_lstm, helper, decoder_state,
                                decoder_output_layer)
                        elif config.decode_mode == 'beam':
                            encoder_reprs = tf.contrib.seq2seq.tile_batch(
                                encoder_reprs, multiplier=config.beam_width)
                            decoder_state = tf.contrib.seq2seq.tile_batch(
                                decoder_state, multiplier=config.beam_width)
                            size_src = tf.contrib.seq2seq.tile_batch(
                                size_sr, multiplier=config.beam_width)

                            attention_mechanism = LuongAttention(
                                num_units=config.attention_units,
                                memory=encoder_reprs,
                                memory_sequence_length=size_src,
                                scale=True)
                            attention_cell = AttentionWrapper(
                                decoder_lstm,
                                attention_mechanism,
                                attention_layer_size=config.attention_units)

                            batch_size = 2
                            decoder_initial_state = attention_cell.zero_state(
                                dtype=tf.float32,
                                batch_size=batch_size * config.beam_width)
                            decoder_state = decoder_initial_state.clone(
                                cell_state=decoder_state)

                            #decoder_state = tf.contrib.seq2seq.tile_batch(
                            #  decoder_state, multiplier=config.beam_width)

                            decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                                cell=attention_cell,
                                embedding=word_embedding_matrix,
                                start_tokens=[
                                    embeddings.START, embeddings.START
                                ],
                                end_token=embeddings.END,
                                initial_state=decoder_state,
                                beam_width=config.beam_width,
                                output_layer=decoder_output_layer)

                        outputs, state, _ = tf.contrib.seq2seq.dynamic_decode(
                            decoder,
                            maximum_iterations=config.max_translate_length)
                        #swap_memory=True)

                        if config.decode_mode == 'greedy':
                            self.sample_ids = outputs.sample_id
                        elif config.decode_mode == 'beam':
                            self.sample_ids = outputs.predicted_ids
                    '''
          outputs, state = tf.nn.dynamic_rnn(
            model_helpers.lstm_cell(config.bidirectional_sizes[0], inputs.keep_prob,
                                    config.projection_size),
            word_embeddings,
            initial_state=decoder_state,
            dtype=tf.float32,
            sequence_length=size_tgt,
            scope='predictlstm'
          )
          '''

                    self.state = state

                    #self.logits = tf.layers.dense(outputs, n_classes, name='predict')
                    #self.logits = tf.layers.dense(outputs.rnn_output, n_classes, name='predict')

                if is_translate:
                    return

                targets = words_tgt_out
                targets *= (1 - inputs.label_smoothing)
                targets += inputs.label_smoothing / n_classes
                self.loss = model_helpers.masked_ce_loss(
                    self.logits, targets, inputs.mask)

Пример #14

Показать файл

Файл: model.py Проект: zl007700/Seq2Seq

    def buildModel(self):
        T_in = self.args.T_in
        T_out = self.args.T_out
        D_in = self.args.D_in
        D_out = self.args.D_out
        E = self.args.embedding_dim
        H = self.args.hidden_dim
        SOS = self.args.SOS
        EOS = self.args.EOS
        PAD = self.args.PAD
        beam_width = 3

        # Input
        with tf.name_scope('input'):
            x = tf.placeholder(shape=(None, T_in),
                               dtype=tf.int32,
                               name='encoder_inputs')
            # N, T_out
            y = tf.placeholder(shape=(None, T_out),
                               dtype=tf.int32,
                               name='decoder_inputs')
            # N
            x_len = tf.placeholder(shape=(None, ), dtype=tf.int32)
            # N
            y_len = tf.placeholder(shape=(None, ), dtype=tf.int32)
            # dynamic sample num
            batch_size = tf.shape(x)[0]

            # symbol mask
            sos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * SOS
            eos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * EOS
            pad = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * PAD

            # input mask
            x_mask = tf.sequence_mask(x_len, T_in, dtype=tf.float32)
            y_with_sos_mask = tf.sequence_mask(y_len,
                                               T_out + 1,
                                               dtype=tf.float32)
            y_with_pad = tf.concat([y, pad], axis=1)
            eos_mask = tf.one_hot(y_len, depth=T_out + 1, dtype=tf.int32) * EOS

            # masked inputs
            y_with_eos = y_with_pad + eos_mask
            y_with_sos = tf.concat([sos, y], axis=1)

        ## Embedding
        with tf.name_scope('embedding'):
            if self.args.use_pretrained:
                embedding_pretrained = np.fromfile(self.args.pretrained_file,
                                                   dtype=np.float32).reshape(
                                                       (-1, E))
                embedding = tf.Variable(embedding_pretrained, trainable=False)
            else:
                embedding = tf.get_variable(name='embedding',
                                            shape=(D_in, E),
                                            dtype=tf.float32,
                                            initializer=xavier_initializer())
            e_x = tf.nn.embedding_lookup(embedding, x)
            e_y = tf.nn.embedding_lookup(embedding, y_with_sos)
            if self.args.mode == 'train':
                e_x = tf.nn.dropout(e_x, self.args.keep_prob)

        ## Encoder
        with tf.name_scope('encoder'):
            ## Multi-BiLSTM
            fw_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            bw_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            bi_encoder_output, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                e_x,
                sequence_length=x_len,
                dtype=tf.float32,
                time_major=False,
                scope=None)
            encoder_output = bi_encoder_output[0] + bi_encoder_output[1]
            encoder_final_state = bi_encoder_state[0]

        ## Decoder
        with tf.name_scope('decoder'):
            decoder_cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(num_units=H)
                for i in range(self.args.layer_size)
            ])
            decoder_lengths = tf.ones(shape=[batch_size],
                                      dtype=tf.int32) * (T_out + 1)

            ## Trainning decoder
            with tf.variable_scope('attention'):
                attention_mechanism = LuongAttention(
                    num_units=H,
                    memory=encoder_output,
                    memory_sequence_length=x_len,
                    name='attention_fn')
            projection_layer = Dense(units=D_out,
                                     kernel_initializer=xavier_initializer())

            train_decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=attention_mechanism,
                attention_layer_size=H)
            train_decoder_init_state = train_decoder_cell.zero_state(
                batch_size=batch_size,
                dtype=tf.float32).clone(cell_state=encoder_final_state)
            training_helper = TrainingHelper(e_y,
                                             decoder_lengths,
                                             time_major=False)
            train_decoder = BasicDecoder(
                cell=train_decoder_cell,
                helper=training_helper,
                initial_state=train_decoder_init_state,
                output_layer=projection_layer)
            train_decoder_outputs, _, _ = dynamic_decode(
                train_decoder,
                impute_finished=True,
                maximum_iterations=T_out + 1)
            # N, T_out+1, D_out
            train_decoder_outputs = ln(train_decoder_outputs.rnn_output)

            ## Beam_search decoder
            beam_memory = tile_batch(encoder_output, beam_width)
            beam_memory_state = tile_batch(encoder_final_state, beam_width)
            beam_memory_length = tile_batch(x_len, beam_width)

            with tf.variable_scope('attention', reuse=True):
                beam_attention_mechanism = LuongAttention(
                    num_units=H,
                    memory=beam_memory,
                    memory_sequence_length=beam_memory_length,
                    name='attention_fn')
            beam_decoder_cell = AttentionWrapper(
                cell=decoder_cell,
                attention_mechanism=beam_attention_mechanism,
                attention_layer_size=None)
            beam_decoder_init_state = beam_decoder_cell.zero_state(
                batch_size=batch_size * beam_width,
                dtype=tf.float32).clone(cell_state=beam_memory_state)
            start_tokens = tf.ones((batch_size), dtype=tf.int32) * SOS
            beam_decoder = BeamSearchDecoder(
                cell=beam_decoder_cell,
                embedding=embedding,
                start_tokens=start_tokens,
                end_token=EOS,
                initial_state=beam_decoder_init_state,
                beam_width=beam_width,
                output_layer=projection_layer)
            beam_decoder_outputs, _, _ = dynamic_decode(
                beam_decoder,
                scope=tf.get_variable_scope(),
                maximum_iterations=T_out + 1)
            beam_decoder_result_ids = beam_decoder_outputs.predicted_ids

        with tf.name_scope('loss'):
            logits = tf.nn.softmax(train_decoder_outputs)
            cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(
                y_with_eos, logits)
            loss_mask = tf.sequence_mask(y_len + 1,
                                         T_out + 1,
                                         dtype=tf.float32)
            loss = tf.reduce_sum(cross_entropy * loss_mask) / tf.cast(
                batch_size, dtype=tf.float32)
            prediction = tf.argmax(logits, 2)

        ## train_op
        with tf.name_scope('train'):
            global_step = tf.train.get_or_create_global_step()
            lr = noam_scheme(self.args.lr, global_step, self.args.warmup_steps)
            optimizer = tf.train.AdamOptimizer(lr)

            ## gradient clips
            trainable_params = tf.trainable_variables()
            gradients = tf.gradients(loss, trainable_params)
            clip_gradients, _ = tf.clip_by_global_norm(
                gradients, self.args.gradient_clip_num)
            train_op = optimizer.apply_gradients(zip(clip_gradients,
                                                     trainable_params),
                                                 global_step=global_step)

        # Summary
        with tf.name_scope('summary'):
            tf.summary.scalar('lr', lr)
            tf.summary.scalar('loss', loss)
            tf.summary.scalar('global_step', global_step)
            summaries = tf.summary.merge_all()
        return x, y, x_len, y_len, logits, loss, prediction, beam_decoder_result_ids, global_step, train_op, summaries

Пример #15

Показать файл

    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """ 构建解码器cell """
        encoder_inputs_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        if self.use_beamsearch_decode:
            encoder_outputs = seq2seq.tile_batch(encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_state = seq2seq.tile_batch(encoder_state,
                                               multiplier=self.beam_width)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)
            #如果使用了beamsearch， 那么输入应该是beam_width的倍数等于batch_size的
            batch_size *= self.beam_width

        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)
        else:
            #BahdanauAttention 就是初始化时传入 num_units 以及 Encoder Outputs，然后调时传入 query 用即可得到权重变量 alignments。
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)

        cell = MultiRNNCell([
            self.build_signle_cell(self.hidden_units,
                                   use_residual=self.use_residual)
            for _ in range(self.depth)
        ])
        # 在非训练（预测）模式，并且没开启 beamsearch 的时候，打开 attention 历史信息
        alignment_history = (self.mode != 'train'
                             and not self.use_beamsearch_decode)

        def cell_input_fn(inputs, attention):
            """ 根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算"""
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        attention_cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            alignment_history=alignment_history,
            cell_input_fn=cell_input_fn,
            name='AttentionWrapper')
        # 空状态
        decoder_initial_state = attention_cell.zero_state(
            batch_size, tf.float32)

        #传递encoder的状态  定义decoder阶段的初始化状态，直接使用encoder阶段的最后一个隐层状态进行赋值
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_state)
        return attention_cell, decoder_initial_state

Пример #16

Показать файл

 def build_graph(self, values, values_mask):
     with vs.variable_scope(self.name):
         lens = tf.reduce_sum(values_mask, axis=1)
         attention_mechanism = LuongAttention(self.attention_dim, values, lens)
         encoder = RNNEncoder(self.attention_dim, self.keep_prob, attention_mechanism)
         return encoder.build_graph(values, values_mask)

Пример #17

Показать файл

    def build_decoder_cell(self):
        """构建解码器cell"""

        encoder_outputs = self.encoder_outputs
        encoder_last_state = self.encoder_last_state
        encoder_inputs_length = self.encoder_inputs_length

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        # 使用 BeamSearchDecoder 的时候，必须根据 beam_width 来成倍的扩大一些变量
        # encoder_outputs, encoder_last_state, encoder_inputs_length
        # needs to be tiled so that:
        # [batch_size, .., ..] -> [batch_size x beam_width, .., ..]
        if self.use_beamsearch_decode:

            encoder_outputs = seq2seq.tile_batch(encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_last_state = nest.map_structure(
                lambda s: seq2seq.tile_batch(s, self.beam_width),
                self.encoder_last_state)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        # 计算解码器的隐藏神经元数，如果编码器是 bidirectional 的
        # 那么解码器的一些隐藏神经元应该乘 2
        num_units = self.hidden_units
        if self.bidirectional:
            num_units *= 2

        # 下面是两种不同的 Attention 机制
        if self.attention_type.lower() == 'luong':
            # 'Luong' style attention: https://arxiv.org/abs/1508.04025
            self.attention_mechanism = LuongAttention(
                num_units=num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)
        else:  # Default Bahdanau
            # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
            self.attention_mechanism = BahdanauAttention(
                num_units=num_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)

        # Building decoder_cell
        self.decoder_cell_list = [
            self.build_single_cell(num_units, use_residual=self.use_residual)
            for i in range(self.depth)
        ]

        decoder_initial_state = encoder_last_state

        def attn_decoder_input_fn(inputs, attention):
            """根据attn_input_feeding属性来判断是否在attention计算前进行一次投影计算
            """
            if not self.attn_input_feeding:
                return inputs

            # Essential when use_residual=True
            hidden_units = self.hidden_units
            if self.bidirectional:
                hidden_units *= 2
            attn_projection = layers.Dense(
                hidden_units,
                dtype=tf.float32,
                # use_bias=False,
                name='attn_input_feeding')
            return attn_projection(array_ops.concat([inputs, attention], -1))

        # AttentionWrapper wraps RNNCell with the attention_mechanism
        # Note: We implement Attention mechanism only on the top decoder layer
        self.decoder_cell_list[-1] = AttentionWrapper(
            cell=self.decoder_cell_list[-1],
            attention_mechanism=self.attention_mechanism,
            # attention_layer_size=self.hidden_units,
            attention_layer_size=int(num_units / 2),
            cell_input_fn=attn_decoder_input_fn,
            initial_cell_state=encoder_last_state[-1],
            alignment_history=self.alignment_history,
            name='Attention_Wrapper')

        # To be compatible with AttentionWrapper, the encoder last state
        # of the top layer should be converted
        # into the AttentionWrapperState form
        # We can easily do this by calling AttentionWrapper.zero_state

        # Also if beamsearch decoding is used,
        # the batch_size argument in .zero_state
        # should be ${decoder_beam_width} times to the origianl batch_size
        # 如果使用了 beamsearch 那么输入应该是 beam_width 倍于 batch_size 的
        batch_size = self.batch_size if not self.use_beamsearch_decode \
                     else self.batch_size * self.beam_width
        initial_state = [state for state in encoder_last_state]

        initial_state[-1] = self.decoder_cell_list[-1].zero_state(
            batch_size=batch_size, dtype=tf.float32)
        decoder_initial_state = tuple(initial_state)

        return MultiRNNCell(self.decoder_cell_list), decoder_initial_state

Пример #18

Показать файл

    def build_decoder_cell(self,encoder_outputs,encoder_state):
        '''

        构建解码器的cell
        :param encoder_outputs:
        :param encoder_state:
        :return:
        '''
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs,(1,0,2))

        if self.use_beamsearch_decode:
            '''这个tile_batch 会将tensor复制self.beam_with 份，相当于是
            batch的数据变成了原来的self.beam_width 倍
            '''
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs,multiplier=self.beam_width
            )
            encoder_state = seq2seq.tile_batch(
                encoder_state,multiplier=self.beam_width
            )


            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length,multiplier=self.beam_width
            )

            #如果使用了beamsearch，那么输入应该是beam_width的倍数乘以batch_size
            batch_size *=self.beam_width


        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )#这里的memory 觉得传递得有问题，为什么不是encoder_state呢？

        cell = MultiRNNCell(
            [
                self.build_single_cell(
                    self.hidden_units,
                    use_residual=self.use_residual
                )

                for _ in range(self.depth)
            ])

        alignment_history = (
            self.mode != 'train' and not self.use_beamsearch_decode
        )

        def cell_input_fn(inputs,attention):
            '''
            根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算
            :param inputs:
            :param attention:
            :return:
            '''

            if not self.use_residual:
                return array_ops.concat([inputs,attention],-1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')

            '''
            这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是
            layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1))
            因为Dense内部实际上是定义了__call__(self): 的方法，因此可以这样使用
            '''
            return attn_projection(array_ops.concat([inputs,attention],-1))


        cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            alignment_history=alignment_history,#这个是attention的历史信息
            cell_input_fn=cell_input_fn,#将attention拼接起来和input拼接起来
            name='Attention_Wrapper'
        )#AttentionWrapper 注意力机制的包裹器

        decoder_initial_state = cell.zero_state(
            batch_size,tf.float32
        )#这里初始化decoder_inital_state

        #传递encoder的状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state = encoder_state
        )

        return cell,decoder_initial_state

Пример #19

Показать файл

    def build_decoder_cell(self, encoder_outputs, encoder_state):
        """
        构建解码器cell
        :param encoder_outputs: 编码输出
        :param encoder_state: 编码final state
        :return: cell: 带attention机制的rnn解码单元，
                 decoder_initial_state：decoder隐藏状态h0输入
        """
        encoder_inputs_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_state = encoder_state[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        # BeamSearchDecoder
        if self.use_beamsearch_decode:
            encoder_outputs = seq2seq.tile_batch(encoder_outputs,
                                                 multiplier=self.beam_width)
            encoder_state = seq2seq.tile_batch(encoder_state,
                                               multiplier=self.beam_width)
            encoder_inputs_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width)

        if self.attention_type.lower() == 'luong':
            # 'Luong' style attention: https://arxiv.org/abs/1508.04025
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)
        else:  # Default Bahdanau
            # 'Bahdanau' style attention: https://arxiv.org/abs/1409.0473
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_inputs_length)

        # Building decoder_cell
        if self.bidirectional:
            cell = MultiRNNCell([
                self.build_single_cell(self.hidden_units * 2,
                                       use_residual=self.use_residual)
                for _ in range(self.depth)
            ])
        else:
            cell = MultiRNNCell([
                self.build_single_cell(self.hidden_units,
                                       use_residual=self.use_residual)
                for _ in range(self.depth)
            ])

        # 在预测模式，并且没开启 beamsearch 的时候，打开 attention 历史信息
        alignment_history = (self.mode != 'train'
                             and not self.use_beamsearch_decode)

        def cell_input_fn(inputs, attention):
            """根据attn_input_feeding属性来判断是否在attention计算前进行一次投影计算
            """
            if not self.use_residual:
                return array_ops.concat([inputs, attention], -1)
            mul = 2 if self.bidirectional else 1
            attn_projection = layers.Dense(self.hidden_units * mul,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')

            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(cell,
                                self.attention_mechanism,
                                attention_layer_size=self.hidden_units,
                                alignment_history=alignment_history,
                                cell_input_fn=cell_input_fn,
                                name='Attention_Wrapper')

        if self.use_beamsearch_decode:
            # 如果使用了 beamsearch 那么输入应该是 beam_width 倍于 batch_size
            # batch_size *= self.beam_width
            decoder_initial_state = cell.zero_state(batch_size=batch_size *
                                                    self.beam_width,
                                                    dtype=tf.float32)
            decoder_initial_state = decoder_initial_state.clone(
                cell_state=encoder_state)
        else:
            # 空状态
            decoder_initial_state = cell.zero_state(batch_size, tf.float32)
            # 传递encoder状态
            decoder_initial_state = decoder_initial_state.clone(
                cell_state=encoder_state)

        return cell, decoder_initial_state

Пример #20

Показать файл

Файл: neural_machine_translation.py Проект: stewin7/NeuralMachineTranslation

    def build_decoder_cell(self, encoder_outputs, encoder_states):
        '''

        构建解码器的cell,返回一个解码器的cell和解码器初始化状态。
        :param encoder_outputs:

        :param encoder_state:
        :return:
        '''
        encoder_input_length = self.encoder_inputs_length
        batch_size = self.batch_size

        if self.bidirectional:
            encoder_states = encoder_states[-self.depth:]

        if self.time_major:
            encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2))

        assert encoder_input_length is not None, 'encoder_state_length 不能为空'
        assert isinstance(batch_size, int), 'batchsize的值必须为int类型'
        assert encoder_outputs is not None, 'encoder_outputs is not None'
        assert encoder_states is not None, 'encoder_state is not None'
        #########################使用beamsearch的情况#####################################################
        if self.use_beamsearch_decode:
            '''这个tile_batch 会将tensor复制self.beam_with 份，相当于是
            batch的数据变成了原来的self.beam_width 倍
            '''
            encoder_outputs = seq2seq.tile_batch(
                encoder_outputs, multiplier=self.beam_width
            )
            encoder_states = seq2seq.tile_batch(
                encoder_states, multiplier=self.beam_width
            )
            encoder_input_length = seq2seq.tile_batch(
                self.encoder_inputs_length, multiplier=self.beam_width
            )
            # 如果使用了beamsearch，那么输入应该是beam_width的倍数乘以batch_size
            batch_size *= self.beam_width
        #########################使用beamsearch的情况#####################################################

        #########################使用注意力机制###########################################################
        if self.attention_type.lower() == 'luong':
            self.attention_mechanism = LuongAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )
        else:
            self.attention_mechanism = BahdanauAttention(
                num_units=self.hidden_units,
                memory=encoder_outputs,
                memory_sequence_length=encoder_input_length
            )  # 双向LSTM的话encoder_outputs 就是它的隐藏状态h1
        #########################使用注意力机制###########################################################

        cell = MultiRNNCell(
            [
                self.build_single_cell(
                    self.hidden_units,
                    use_residual=self.use_residual
                )
                for _ in range(self.depth)
            ])
        # 这个cell就是多层的。

        alignment_history = (
                self.mode != 'train' and not self.use_beamsearch_decode
        )

        # alignment_history在不是训练状态以及没有使用beamsearch的时候使用。

        def cell_input_fn(inputs, attention):
            '''
            根据attn_input_feeding属性来判断是否在attention计算前进行一次投影的计算
            使用注意力机制才会进行的运算
            :param inputs:
            :param attention:
            :return:
            '''

            if not self.use_residual:
                print(inputs.get_shape, 'inputs_shape')
                print(attention.get_shape, 'inputs_shape')
                print(array_ops.concat([inputs, attention], -1), 'inputs和attention拼接之后的形状')
                return array_ops.concat([inputs, attention], -1)

            attn_projection = layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')

            '''
            这个attn_projection(array_ops.concat([inputs,attention],-1))我的理解就是
            layers.Dense(self.hidden_units,
                                           dtype=tf.float32,
                                           use_bias=False,
                                           name='attention_cell_input_fn')(array_ops.concat([inputs,attention],-1))
            Dense最终继承了Layer类，Layer中定义了call方法和__call__ 方法，Dense也重写了call方法，__call__方法中调用call方法，call方法中还是起一个全连接层层的作用，__call__
            方法中执行流程是：pre process，call，post process
            '''
            return attn_projection(array_ops.concat([inputs, attention], -1))

        cell = AttentionWrapper(
            cell=cell,
            attention_mechanism=self.attention_mechanism,
            attention_layer_size=self.hidden_units,
            alignment_history=alignment_history,  # 这个是attention的历史信息
            cell_input_fn=cell_input_fn,  # 将attention拼接起来和input拼接起来
            name='Attention_Wrapper'
        )  # AttentionWrapper 注意力机制的包裹器

        decoder_initial_state = cell.zero_state(
            batch_size, tf.float32
        )  # 这里初始化decoder_inital_state

        # 传递encoder的状态
        decoder_initial_state = decoder_initial_state.clone(
            cell_state=encoder_states
        )

        return cell, decoder_initial_state