def initialize(self, batch): ''' param: batch: Batch object ''' with tf.variable_scope('inference') as scope: linear_targets = batch._lin_targets is_training = linear_targets is not None batch_size = batch.get_size() # Encoder encoder = Encoder(is_training=is_training) encoder_outputs = encoder.encode(batch.get_embedds(), batch.get_input_lengths()) # Decoder if is_training: helper = TrainingHelper(batch.get_inputs(), batch.get_mel_targets(), self._hparams.num_mels, self._hparams.outputs_per_step) else: helper = TestingHelper(batch_size, self._hparams.num_mels, self._hparams.outputs_per_step) decoder = Decoder(helper, is_training=is_training) mel_outputs, lin_outputs, final_decoder_state = decoder.decode( encoder_outputs, batch_size) # Alignments alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.inputs, self.input_lengths, self.mel_targets, self.linear_targets = batch.get_all( ) self.mel_outputs = mel_outputs self.linear_outputs = lin_outputs self.alignments = alignments self.global_step = tf.Variable(0, name='global_step', trainable=False)
class Seq2Seq(nn.Module): def __init__(self, vocab_size, embed_size, hidden_size, rnn_type='LSTM', num_layers=1, bidirectional=False, attention_type='Bilinear', dropout=0): super(Seq2Seq, self).__init__() self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embed_size) self.encoder = Encoder(embed_size=embed_size, hidden_size=hidden_size, rnn_type=rnn_type, num_layers=num_layers, bidirectional=bidirectional, dropout=dropout) self.decoder = Decoder(embedding=self.embedding, hidden_size=hidden_size, rnn_type=rnn_type, num_layers=num_layers, attention_type=attention_type, dropout=dropout) def load_pretrained_embeddings(self, path): self.embedding.weight.data.copy_(torch.from_numpy( np.load(path))) # Load pretrained embeddings self.embedding.weight.requires_grad = False # Don't update word vectors during training def forward(self, src, trg): """ :param src: LongTensor (batch_size, src_time_step) :param trg: LongTensor (batch_size, trg_time_step) :return: """ src_memory, src_mask, src_lens, init_states = self.encode(src) init_output = self.decoder.get_init_output(src_memory, src_lens, init_states) return self.decoder(src_memory, src_mask, init_states, init_output, trg) def encode(self, src): """ :param src: LongTensor (batch_size, time_step) :param src_lens: LongTensor (batch_size,) :return: """ src = sentence_clip(src) src_mask = (src != PAD_INDEX) src_lens = src_mask.long().sum(dim=1, keepdim=False) src_embedding = self.embedding( src) # Tensor(batch_size, time_step, embed_size) src_memory, final_states = self.encoder(src_embedding, src_lens) return src_memory, src_mask, src_lens, final_states def decode(self, src, max_len): """ :param src: LongTensor (batch_size, src_time_step) :param max_len: int :return: """ src_memory, src_mask, src_lens, init_states = self.encode(src) init_output = self.decoder.get_init_output(src_memory, src_lens, init_states) outputs = self.decoder.decode(src_memory, src_mask, init_states, init_output, max_len) return outputs def beam_decode(self, src, max_len, beam_size): """ :param src: LongTensor (batch_size, src_time_step) :param max_len: int :param beam_size: int :return: """ src_memory, src_mask, src_lens, init_states = self.encode(src) init_output = self.decoder.get_init_output(src_memory, src_lens, init_states) outputs = self.decoder.beam_decode(src_memory, src_mask, init_states, init_output, max_len, beam_size) return outputs