Python cbhg示例，modules.cbhg Python示例

示例#1

0

显示文件

文件： net_mid.py 项目： chinacharlie/s2s

    def fnet(self, mel, is_training=True, reuse=None):
        prenet_out = prenet(mel,
                            num_units=[hp.hidden_units, hp.hidden_units // 2],
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            reuse=reuse)  # (N, T, E/2)
        # CBHG1: mel-scale
        out, _ = cbhg(prenet_out, hp.num_banks, hp.hidden_units // 2,
                        hp.num_highway_blocks, hp.norm_type, is_training,
                        scope="fnet_cbhg1",
                        reuse=reuse)
        
        mid = out


        out, _ = cbhg(prenet_out, hp.num_banks, hp.hidden_units // 2,
                        hp.num_highway_blocks, hp.norm_type, is_training,
                        scope="fnet_cbhg2",
                        reuse=reuse)

        # Final linear projection
        logits = tf.layers.dense(out, hp.len_chinese_ppgs, trainable=is_training, reuse=reuse)  # (N, T, V)
        ppgs = tf.nn.softmax(logits / hp.t, name='ppgs')  # (N, T, V)
        preds = tf.to_int32(tf.argmax(logits, axis=-1))  # (N, T)

        decoded = tf.transpose(logits, perm=[1, 0, 2])    
        sequence_len = tf.reduce_sum(tf.cast(tf.not_equal(tf.reduce_sum(mel, reduction_indices=2), 0.), tf.int32), reduction_indices=1)  
        decoded, _ = tf.nn.ctc_beam_search_decoder(decoded, sequence_len, merge_repeated=False)    
        decoded = tf.sparse_to_dense(decoded[0].indices,decoded[0].dense_shape,decoded[0].values)

        return mid, logits, ppgs, preds, decoded

示例#2

0

显示文件

文件： models.py 项目： huskychatbot/chatbot

    def network(self, ppgs, is_training):
        # Pre-net
        prenet_out = prenet(ppgs,
                            num_units=[hp.Train2.hidden_units,
                                       hp.Train2.hidden_units // 2],
                            dropout_rate=hp.Train2.dropout_rate,
                            is_training=is_training)  # (N, T, E/2)

        # CBHG1: mel-scale
        pred_mel = cbhg(prenet_out, hp.Train2.num_banks, hp.Train2.hidden_units // 2,
                        hp.Train2.num_highway_blocks, hp.Train2.norm_type, is_training,
                        scope="cbhg_mel")
        pred_mel = tf.layers.dense(
            pred_mel, self.y_mel.shape[-1], name='pred_mel')  # (N, T, n_mels)

        # CBHG2: linear-scale
        pred_spec = tf.layers.dense(
            pred_mel, hp.Train2.hidden_units // 2)  # (N, T, n_mels)
        pred_spec = cbhg(pred_spec, hp.Train2.num_banks, hp.Train2.hidden_units // 2,
                         hp.Train2.num_highway_blocks, hp.Train2.norm_type, is_training, scope="cbhg_linear")
        # log magnitude: (N, T, 1+n_fft//2)
        pred_spec = tf.layers.dense(
            pred_spec, self.y_spec.shape[-1], name='pred_spec')

        return pred_spec, pred_mel

示例#3

0

显示文件

文件： net_mid.py 项目： chinacharlie/s2s

    def gnet(self, feature, is_training=True, reuse=None):

        prenet_out = tf.layers.dense(feature, hp.hidden_units, reuse=reuse)

        prenet_out = prenet(prenet_out,
                            num_units=[hp.hidden_units, hp.hidden_units],
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            reuse=reuse)  # (N, T, E/2)
        
        # CBHG1: mel-scale
        pred_mel, _ = cbhg(prenet_out, hp.num_banks, hp.hidden_units,
                        hp.num_highway_blocks, hp.norm_type, is_training,
                        scope="cbhg_gnet_mel",
                        reuse=reuse)

        g_mel = tf.layers.dense(pred_mel, self.x_mel.shape[-1], name='g_mel', reuse=reuse)  # (N, T, n_mel)

        pred_spec = tf.layers.dense(g_mel, hp.hidden_units, reuse=reuse)  # (N, T, n_mels)

        pred_spec, _ = cbhg(pred_spec, hp.num_banks, hp.hidden_units,
                   hp.num_highway_blocks, hp.norm_type, is_training, 
                   scope="cbhg_gnet_spec",
                   reuse=reuse)

        g_spec = tf.layers.dense(pred_spec, self.x_spec.shape[-1], name = 'g_spec', reuse=reuse)


        return g_spec, g_mel

示例#4

0

显示文件

    def _net1(self):
        with tf.variable_scope('net1'):
            # Load vocabulary
            phn2idx, idx2phn = load_vocab()

            # Pre-net
            prenet_out = prenet(self.x_mfcc,
                                num_units=[
                                    hp.Train1.hidden_units,
                                    hp.Train1.hidden_units // 2
                                ],
                                dropout_rate=hp.Train1.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG
            out = cbhg(prenet_out, hp.Train1.num_banks,
                       hp.Train1.hidden_units // 2,
                       hp.Train1.num_highway_blocks, hp.Train1.norm_type,
                       self.is_training)

            # Final linear projection
            logits = tf.layers.dense(out, len(phn2idx))  # (N, T, V)
            ppgs = tf.nn.softmax(logits / hp.Train1.t)  # (N, T, V)
            preds = tf.to_int32(tf.arg_max(logits, dimension=-1))  # (N, T)

        return ppgs, preds, logits

示例#5

0

显示文件

文件： encoder.py 项目： mujjingun/myrnn2

    def __init__(self, hyperparams, is_training, inputs, input_lengths):
        # inputs: (batch, max_input_length)
        # input_lengths: (batch)

        # Embeddings
        char_embed_table = tf.get_variable(
            'embedding', [hyperparams.num_symbols, hyperparams.embedding_size],
            dtype=tf.float32,
            initializer=tf.truncated_normal_initializer(stddev=0.5))

        # [N, T_in, embedding_size]
        char_embedded_inputs = tf.nn.embedding_lookup(char_embed_table, inputs)

        # [N, T_in, enc_prenet_sizes[-1]]
        prenet_outputs = modules.prenet(
            char_embedded_inputs,
            is_training,
            layer_sizes=hyperparams.enc_prenet_sizes,
            drop_prob=hyperparams.dropout_prob,
            scope='prenet')

        encoder_outputs = modules.cbhg(prenet_outputs,
                                       input_lengths,
                                       is_training,
                                       hyperparams.enc_bank_size,
                                       hyperparams.enc_bank_channel_size,
                                       hyperparams.enc_maxpool_width,
                                       hyperparams.enc_highway_depth,
                                       hyperparams.enc_rnn_size,
                                       hyperparams.enc_proj_sizes,
                                       hyperparams.enc_proj_width,
                                       scope="encoder_cbhg")

        self.encoder_outputs = encoder_outputs

示例#6

0

显示文件

文件： models.py 项目： rickyHong/deep-voice-conversion-enhance-repl

    def network(self, ppgs, is_training):
        # Pre-net
        prenet_out = prenet(
            ppgs,
            num_units=[hp.train2.hidden_units, hp.train2.hidden_units // 2],
            dropout_rate=hp.train2.dropout_rate,
            is_training=is_training)  # (N, T, E/2)

        # CBHG1: mel-scale
        # pred_mel = cbhg(prenet_out, hp.train2.num_banks, hp.train2.hidden_units // 2,
        #                 hp.train2.num_highway_blocks, hp.train2.norm_type, is_training,
        #                 scope="cbhg_mel")
        # pred_mel = tf.layers.dense(pred_mel, self.y_mel.shape[-1])  # (N, T, n_mels)
        pred_mel = prenet_out

        # CBHG2: linear-scale
        out = tf.layers.dense(pred_mel,
                              hp.train2.hidden_units // 2)  # (N, T, n_mels)
        out = cbhg(out,
                   hp.train2.num_banks,
                   hp.train2.hidden_units // 2,
                   hp.train2.num_highway_blocks,
                   hp.train2.norm_type,
                   is_training,
                   scope="cbhg_linear")

        _, n_timesteps, n_bins = self.y_spec.get_shape().as_list()
        n_units = n_bins * hp.train2.n_mixtures
        out = tf.layers.dense(out,
                              n_units * 3,
                              bias_initializer=tf.random_uniform_initializer(
                                  minval=-3., maxval=3.))

        mu = tf.nn.sigmoid(out[..., :n_units])
        mu = tf.reshape(
            mu,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_var = tf.maximum(out[..., n_units:2 * n_units], -7.0)
        log_var = tf.reshape(
            log_var,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_pi = tf.reshape(
            out[..., 2 * n_units:3 * n_units],
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)
        log_pi = normalize(log_pi,
                           type='ins',
                           is_training=get_current_tower_context().is_training,
                           scope='normalize_pi')
        log_pi = tf.nn.log_softmax(log_pi)

        return mu, log_var, log_pi

示例#7

0

显示文件

文件： models.py 项目： mynameismaxz/deep-voice-conversion

    def _net2(self):
        # PPGs from net1
        ppgs, preds_ppg, logits_ppg = self._net1()

        with tf.variable_scope('net2'):
            # Pre-net
            prenet_out = prenet(ppgs,
                                num_units=[
                                    self.hparams.Train2.hidden_units,
                                    self.hparams.Train2.hidden_units // 2
                                ],
                                dropout_rate=self.hparams.Train2.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG1: mel-scale
            pred_mel = cbhg(prenet_out,
                            self.hparams.Train2.num_banks,
                            self.hparams.Train2.hidden_units // 2,
                            self.hparams.Train2.num_highway_blocks,
                            self.hparams.Train2.norm_type,
                            self.is_training,
                            scope="cbhg1")
            pred_mel = tf.layers.dense(
                pred_mel,
                self.y_mel.shape[-1])  # log magnitude: (N, T, n_mels)

            # CBHG2: linear-scale
            pred_spec = tf.layers.dense(pred_mel,
                                        self.hparams.Train2.hidden_units //
                                        2)  # log magnitude: (N, T, n_mels)
            pred_spec = cbhg(pred_spec,
                             self.hparams.Train2.num_banks,
                             self.hparams.Train2.hidden_units // 2,
                             self.hparams.Train2.num_highway_blocks,
                             self.hparams.Train2.norm_type,
                             self.is_training,
                             scope="cbhg2")
            pred_spec = tf.layers.dense(
                pred_spec, self.y_spec.shape[-1]
            )  # log magnitude: (N, T, 1+self.hparams.n_fft//2)

        return ppgs, preds_ppg, logits_ppg, pred_spec, pred_mel

示例#8

0

显示文件

文件： models.py 项目： QianQQ/Voice-Conversion

    def _net2(self):
        # PPGs from net1
        ppgs, preds_ppg, logits_ppg = self._net1()

        with tf.variable_scope('net2'):
            # Pre-net
            prenet_out = prenet(ppgs,
                                num_units=[hp.Train2.hidden_units, hp.Train2.hidden_units // 2],
                                dropout_rate=hp.Train2.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG1: mel-scale
            pred_mel = cbhg(prenet_out, hp.Train2.num_banks, hp.Train2.hidden_units // 2, hp.Train2.num_highway_blocks, hp.Train2.norm_type, self.is_training, scope="cbhg1")
            pred_mel = tf.layers.dense(pred_mel, self.y_mel.shape[-1])  # log magnitude: (N, T, n_mels)

            # CBHG2: linear-scale
            pred_spec = tf.layers.dense(pred_mel, hp.Train2.hidden_units // 2)  # log magnitude: (N, T, n_mels)
            pred_spec = cbhg(pred_spec, hp.Train2.num_banks, hp.Train2.hidden_units // 2, hp.Train2.num_highway_blocks, hp.Train2.norm_type, self.is_training, scope="cbhg2")
            pred_spec = tf.layers.dense(pred_spec, self.y_spec.shape[-1])  # log magnitude: (N, T, 1+hp.n_fft//2)

        return ppgs, preds_ppg, logits_ppg, pred_spec, pred_mel

示例#9

0

显示文件

    def network(self, x_mfcc, is_training):
        # Pre-net
        prenet_out = prenet(x_mfcc,
                            num_units=[hp.train1.hidden_units, hp.train1.hidden_units // 2],
                            dropout_rate=hp.train1.dropout_rate,
                            is_training=is_training)  # (N, T, E/2)

        # CBHG
        out = cbhg(prenet_out, hp.train1.num_banks, hp.train1.hidden_units // 2,
                   hp.train1.num_highway_blocks, hp.train1.norm_type, is_training)

        # Final linear projection
        logits = tf.layers.dense(out, len(phns))  # (N, T, V)
        ppgs = tf.nn.softmax(logits / hp.train1.t, name='ppgs')  # (N, T, V)
        preds = tf.to_int32(tf.argmax(logits, axis=-1))  # (N, T)

        return ppgs, preds, logits

示例#10

0

显示文件

def test_cbhg():
    batch_size = 32
    # number of output features of pre-net
    in_channels = 128
    time_steps = 15
    inp = Variable(torch.ones(batch_size, time_steps, in_channels)).cuda()

    bank_k = 16
    bank_ck = 128
    proj_dims = (128, 128)
    highway_layers = 4
    highway_units = 128
    gru_units = 128
    cbhg = CBHG(in_channels, bank_k, bank_ck, proj_dims, highway_layers,
                highway_units, gru_units).cuda()

    out = cbhg(inp)
    assert out.size() == (batch_size, time_steps, 2 * gru_units)

示例#11

0

显示文件

文件： models.py 项目： QianQQ/Voice-Conversion

    def _net1(self):
        with tf.variable_scope('net1'):
            # Load vocabulary
            phn2idx, idx2phn = load_vocab()

            # Pre-net
            prenet_out = prenet(self.x_mfcc,
                                num_units=[hp.Train1.hidden_units, hp.Train1.hidden_units // 2],
                                dropout_rate=hp.Train1.dropout_rate,
                                is_training=self.is_training)  # (N, T, E/2)

            # CBHG
            out = cbhg(prenet_out, hp.Train1.num_banks, hp.Train1.hidden_units // 2, hp.Train1.num_highway_blocks, hp.Train1.norm_type, self.is_training)

            # Final linear projection
            logits = tf.layers.dense(out, len(phn2idx))  # (N, T, V)
            ppgs = tf.nn.softmax(logits / hp.Train1.t)  # (N, T, V)
            preds = tf.to_int32(tf.arg_max(logits, dimension=-1))  # (N, T)

        return ppgs, preds, logits

示例#12

0

显示文件

    def __init__(self,
                 hyperparams,
                 is_training,
                 encoder_outputs,
                 mel_targets=None):
        # mel_targets: (batch, max_sample_length, num_mels)
        # encoder_outputs: (batch, max_sentence_length, enc_rnn_size * 2)

        batch_size = tf.shape(encoder_outputs)[0]

        #GRU = tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell
        GRU = tf.contrib.rnn.GRUCell

        dec_prenet = modules.DecoderPrenetWrapper(
            GRU(hyperparams.attention_state_size), is_training,
            hyperparams.dec_prenet_sizes, hyperparams.dropout_prob)

        attention_mechanism = tf.contrib.seq2seq.BahdanauMonotonicAttention(
            hyperparams.attention_size,
            encoder_outputs,
            normalize=True,
            score_bias_init=4.)

        attention_cell = tf.contrib.seq2seq.AttentionWrapper(
            dec_prenet,
            attention_mechanism,
            alignment_history=True,
            output_attention=False)

        # Concatenate attention context vector and RNN cell output into a 512D vector.
        # [N, T_in, attention_size+attention_state_size]
        concat_cell = modules.ConcatOutputAndAttentionWrapper(attention_cell)

        # Synthesis model for inference
        cells = [concat_cell]
        for layer_index in range(hyperparams.dec_layer_num):
            cell = GRU(hyperparams.dec_rnn_size)
            if layer_index == 0:
                cells.append(cell)
            else:
                cells.append(tf.contrib.rnn.ResidualWrapper(cell))

        # [N, T_in, 256]
        decoder_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

        # GRU layers + linear projection
        # Weights
        proj_input_size = hyperparams.dec_rnn_size
        proj_output_size = hyperparams.num_mels * hyperparams.reduction_factor
        decoder_proj_weights = tf.get_variable(
            'decoder_proj_weights',
            shape=[proj_input_size, proj_output_size],
            initializer=tf.contrib.layers.xavier_initializer())

        if is_training:
            # Training Model for speed
            r = hyperparams.reduction_factor
            pre_padded_mel = tf.pad(mel_targets[:, r - 1:-r + 1:r],
                                    [[0, 0], [1, 0], [0, 0]])
            gru_outputs, states = tf.nn.dynamic_rnn(decoder_cell,
                                                    pre_padded_mel,
                                                    dtype=tf.float32,
                                                    swap_memory=True,
                                                    scope='rnn')

            decoder_outputs = tf.matmul(
                tf.reshape(gru_outputs, (-1, hyperparams.dec_rnn_size)),
                decoder_proj_weights)

            # Grab alignments (N, T_out, T_in)
            self.alignments = tf.transpose(states[0].alignment_history.stack(),
                                           (1, 0, 2))

        else:
            proj_decoder_cell = modules.OutputProjectionWrapper(
                decoder_cell, decoder_proj_weights)

            # Synthesis model for inference
            helper = modules.TacoTestHelper(batch_size, hyperparams.num_mels,
                                            hyperparams.reduction_factor)

            decoder_init_state = proj_decoder_cell.zero_state(
                batch_size=batch_size, dtype=tf.float32)

            (decoder_outputs, _), self.final_state, _ = \
                    tf.contrib.seq2seq.dynamic_decode(
                            tf.contrib.seq2seq.BasicDecoder(proj_decoder_cell, helper, decoder_init_state),
                            maximum_iterations=hyperparams.max_iters,
                            swap_memory=True,
                            scope='rnn')

            # Grab alignments from the final decoder state:
            self.alignments = tf.transpose(
                self.final_state[0].alignment_history.stack(), (1, 0, 2))

        # [N, T_out, M]
        self.mel_outputs = tf.reshape(decoder_outputs,
                                      [batch_size, -1, hyperparams.num_mels])

        # Add post-processing CBHG:
        # [N, T_out, 256]
        post_outputs = modules.cbhg(self.mel_outputs,
                                    None,
                                    is_training,
                                    hyperparams.post_bank_size,
                                    hyperparams.post_bank_channel_size,
                                    hyperparams.post_maxpool_width,
                                    hyperparams.post_highway_depth,
                                    hyperparams.post_rnn_size,
                                    hyperparams.post_proj_sizes,
                                    hyperparams.post_proj_width,
                                    scope='post_cbhg')

        self.linear_outputs = tf.layers.dense(
            post_outputs, hyperparams.num_freq)  # [N, T_out, F]

示例#13

0

显示文件

    def __init__(self,
                 inp,
                 inp_mask,
                 decode_time_steps,
                 hyper_params=None,
                 name='Tacotron'):
        """
        Build the computational graph.
        :param inp:
        :param inp_mask:
        :param decode_time_steps:
        :param hyper_params:
        :param name:
        """
        super(Tacotron, self).__init__(name)
        self.hyper_params = HyperParams(
        ) if hyper_params is None else hyper_params

        with tf.variable_scope(name):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            batch_size = tf.shape(inp)[0]
            input_time_steps = tf.shape(inp)[1]
            reduc = self.hyper_params.reduction_rate
            output_time_steps = decode_time_steps * reduc

            ### Encoder [begin]
            with tf.variable_scope('character_embedding'):
                embed_inp = EmbeddingLayer(self.hyper_params.embed_class,
                                           self.hyper_params.embed_dim)(inp)
            with tf.variable_scope('encoder_pre_net'):
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    embed_inp, 256, tf.nn.relu),
                                               training=False)
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    pre_ed_inp, 128, tf.nn.relu),
                                               training=False)
            encoder_output = modules.cbhg(pre_ed_inp,
                                          training=False,
                                          k=16,
                                          bank_filters=128,
                                          projection_filters=(128, 128),
                                          highway_layers=4,
                                          highway_units=128,
                                          bi_gru_units=128,
                                          sequence_length=inp_mask,
                                          name='encoder_cbhg',
                                          reuse=False)
            ### Encoder [end]

            ### Attention Module
            with tf.variable_scope('attention'):
                att_module = AttentionModule(256,
                                             encoder_output,
                                             sequence_length=inp_mask,
                                             time_major=False)

            ### Decoder [begin]
            att_cell = GRUCell(256)
            dec_cell = MultiRNNCell(
                [ResidualWrapper(GRUCell(256)) for _ in range(2)])
            # prepare output alpha TensorArray
            with tf.variable_scope('prepare_decode'):
                # prepare output alpha TensorArray
                reduced_time_steps = tf.div(output_time_steps, reduc)
                init_att_cell_state = att_cell.zero_state(
                    batch_size, tf.float32)
                init_dec_cell_state = dec_cell.zero_state(
                    batch_size, tf.float32)
                init_state_tup = tuple(
                    [init_att_cell_state, init_dec_cell_state])
                init_output_ta = tf.TensorArray(size=reduced_time_steps,
                                                dtype=tf.float32)
                init_alpha_ta = tf.TensorArray(size=reduced_time_steps,
                                               dtype=tf.float32)
                go_array = tf.zeros(
                    [batch_size, self.hyper_params.seq2seq_dim],
                    dtype=tf.float32)
                init_context = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_time = tf.constant(0, dtype=tf.int32)
            cond = lambda x, *_: tf.less(x, reduced_time_steps)

            def body(this_time, old_output_ta, old_alpha_ta, old_state_tup,
                     last_context, last_output):
                with tf.variable_scope('decoder_pre_net'):
                    dec_pre_ed_inp = last_output
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 256, tf.nn.relu),
                                                       training=False)
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 128, tf.nn.relu),
                                                       training=False)
                with tf.variable_scope('attention_rnn'):
                    att_cell_inp = tf.concat([last_context, dec_pre_ed_inp],
                                             axis=-1)
                    att_cell_out, att_cell_state = att_cell(
                        att_cell_inp, old_state_tup[0])
                with tf.variable_scope('attention'):
                    query = att_cell_state[0]
                    context, alpha = att_module(query)
                    new_alpha_ta = old_alpha_ta.write(this_time, alpha)
                with tf.variable_scope('decoder_rnn'):
                    dec_input = tf.layers.dense(
                        tf.concat([att_cell_out, context], axis=-1), 256)
                    dec_cell_out, dec_cell_state = dec_cell(
                        dec_input, old_state_tup[1])
                    dense_out = tf.layers.dense(
                        dec_cell_out, self.hyper_params.seq2seq_dim * reduc)
                    new_output_ta = old_output_ta.write(this_time, dense_out)
                    new_output = dense_out[:, -self.hyper_params.seq2seq_dim:]
                new_state_tup = tuple([att_cell_state, dec_cell_state])
                return tf.add(
                    this_time, 1
                ), new_output_ta, new_alpha_ta, new_state_tup, context, new_output

            # run loop
            _, seq2seq_output_ta, alpha_ta, *_ = tf.while_loop(
                cond, body, [
                    init_time, init_output_ta, init_alpha_ta, init_state_tup,
                    init_context, go_array
                ])
            with tf.variable_scope('reshape_decode'):
                seq2seq_output = tf.reshape(
                    seq2seq_output_ta.stack(),
                    shape=(reduced_time_steps, batch_size,
                           self.hyper_params.seq2seq_dim * reduc))
                seq2seq_output = tf.reshape(
                    tf.transpose(seq2seq_output, perm=(1, 0, 2)),
                    shape=(batch_size, output_time_steps,
                           self.hyper_params.seq2seq_dim))
                self.seq2seq_output = seq2seq_output

                alpha_output = tf.reshape(alpha_ta.stack(),
                                          shape=(reduced_time_steps,
                                                 batch_size, input_time_steps))
                alpha_output = tf.expand_dims(
                    tf.transpose(alpha_output, perm=(1, 0, 2)), -1)
                self.alpha_output = alpha_output
            ### Decoder [end]

            ### PostNet [begin]
            post_output = modules.cbhg(
                seq2seq_output,
                training=False,
                k=8,
                bank_filters=128,
                projection_filters=(256, self.hyper_params.seq2seq_dim),
                highway_layers=4,
                highway_units=128,
                bi_gru_units=128,
                sequence_length=None,
                name='decoder_cbhg',
                reuse=False)
            post_output = tf.layers.dense(post_output,
                                          self.hyper_params.post_dim,
                                          name='post_linear_transform')
            self.post_output = post_output