def forward(self, decoder_input, z, drop_prob, initial_state=None): """ :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size] :param z: sequence context with shape of [batch_size, latent_variable_size] :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param initial_state: initial state of decoder rnn :return: unnormalized logits of sentense words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' [batch_size, seq_len, _] = decoder_input.size() ''' decoder rnn is conditioned on context via additional bias = W_cond * z to every input token ''' decoder_input = F.dropout(decoder_input, drop_prob) z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size) decoder_input = t.cat([decoder_input, z], 2) rnn_out, final_state = self.rnn(decoder_input, initial_state) rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size) result = self.fc(rnn_out) result = result.view(batch_size, seq_len, self.params.word_vocab_size) return result, final_state
def forward(self, input): """ :param input: [batch_size, seq_len, embed_size] tensor :return: context of input sentenses with shape of [batch_size, latent_variable_size] """ [batch_size, seq_len, embed_size] = input.size() #TEST: note sure this is right (was done for giving right dimensions to the lstm) input = input.view(seq_len, batch_size, embed_size) assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' ''' Unfold rnn with zero initial state and get its final state from the last layer ''' mux, logvarx = [], [] hx = None for i in range(seq_len - 1): _, hx = self.rnn(input[i].unsqueeze(1), hx) h = self.ziphidden(*hx) mu = self.linear_mu(h) logvar = self.linear_var(h) h = self.reparameterize(mu, logvar) mux.append(mu) logvarx.append(logvar) return h
def only_decoder_beam(self, decoder_input, z, drop_prob, initial_state=None): assert parameters_allocation_check(self) # print decoder_input.size() [beam_batch_size, _, _] = decoder_input.size() decoder_input = F.dropout(decoder_input, drop_prob) z = z.unsqueeze(0) # print z.size() z = t.cat([z] * beam_batch_size, 0) # print z.size() # z = z.contiguous().view(1, -1) # z = z.view(beam_batch_size, self.params.latent_variable_size) # print z.size() decoder_input = t.cat([decoder_input, z], 2) # print "decoder_input:",decoder_input.size() rnn_out, final_state = self.rnn(decoder_input, initial_state) # print "rnn_out:",rnn_out.size() # print "final_state_1:",final_state[0].size() # print "final_state_1:",final_state[1].size() return rnn_out, final_state
def forward(self, input): """ :param input: [batch_size, seq_len, embed_size] tensor :return: context of input sentenses with shape of [batch_size, latent_variable_size] """ [batch_size, seq_len, embed_size] = input.size() input = input.view(-1, embed_size) input = self.hw1(input) input = input.view(batch_size, seq_len, embed_size) assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' ''' Unfold rnn with zero initial state and get its final state from the last layer ''' _, (_, final_state) = self.rnn(input) final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size) final_state = final_state[-1] h_1, h_2 = final_state[0], final_state[1] final_state = t.cat([h_1, h_2], 1) return final_state
def forward(self, input): """ :param input: [batch_size, seq_len, embed_size] tensor :return: context of input sentenses with shape of [batch_size, latent_variable_size] """ [batch_size, seq_len, embed_size] = input.size() input = input.view(-1, embed_size) input = self.hw1(input) input = input.view(batch_size, seq_len, embed_size) assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' ''' Unfold rnn with zero initial state and get its final state from the last layer ''' _, (final_state, _) = self.rnn(input) final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size) final_state = final_state[-1] h_1, h_2 = final_state[0], final_state[1] final_state = t.cat([h_1, h_2], 1) final_state = self.hw2(final_state) return final_state
def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, decoder_word_input=None, z=None): """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] kld loss estimation """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input], True) \ or (z is not None and decoder_word_input is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) context = self.encoder(encoder_input) mu = self.context_to_mu(context) logvar = self.context_to_logvar(context) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None decoder_input = self.embedding.word_embed(decoder_word_input) out = self.decoder(decoder_input, z, drop_prob) return out, kld
def forward(self, decoder_input, z, drop_prob, initial_state=None): """ :param decoder_input: tensor with shape of [batch_size, max_seq_len + 1, word_embed_size] :param z: latent variable with shape of [batch_size, latent_variable_size] :param initial_state: initial state of generator rnn :return: unnormalized logits of sentense words distribution probabilities with shape of [batch_size, max_seq_len + 1, word_embed_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' [batch_size, seq_len, _] = decoder_input.size() '''decoder rnn is conditioned on context via additional bias = W_cond * z applied to every input token''' z = z.unsqueeze(1).repeat(1, seq_len, 1) decoder_input = t.cat([decoder_input, z], 2) decoder_input = F.dropout(decoder_input, drop_prob, training=z is None) result, final_state = self.rnn(decoder_input, initial_state) result = result.contiguous().view(-1, self.params.decoder_rnn_size) if self.params.use_highway: result = self.highway(result) result = self.fc(result) if self.params.decoder_type == 'gru_emb': result = result.view(batch_size, seq_len, self.params.word_embed_size) else: result = result.view(batch_size, seq_len, self.params.word_vocab_size) return result, final_state
def forward(self, input, State): """ :param input: [batch_size, seq_len, embed_size] tensor :return: context of input sentenses with shape of [batch_size, latent_variable_size] """ # print "Three" [batch_size, seq_len, embed_size] = input.size() # input shape 32 , 26 , 825 input = input.view(-1, embed_size) # input shape 832(=32*26),825 input = self.hw1(input) # input shape 832(=32*26),825 input = input.view(batch_size, seq_len, embed_size) # input shape 32 , 26 , 825 assert parameters_allocation_check( self ), "Invalid CUDA options. Parameters should be allocated in the same memory" """ Unfold rnn with zero initial state and get its final state from the last layer """ context_ = [] for word_id in range(seq_len): encoder_outputs, (h_0, final_state) = self.rnn( input[:, word_id].unsqueeze(1), State) """Inputs: input, (h_0, c_0) - **input** of shape `(seq_len, batch, input_size)`: tensor containing the features of the input sequence. The input can also be a packed variable length sequence. Outputs: output, (h_n, c_n) - **output** of shape `(seq_len, batch, num_directions * hidden_size)`: tensor containing the output features `(h_t)` from the last layer of the LSTM, for each `t`. If a :class:`torch.nn.utils.rnn.PackedSequence` has been given as the input, the output will also be a packed sequence. """ State = (h_0, final_state) c_0 = final_state final_state = final_state.view(self.params.encoder_num_layers, 2, batch_size, self.params.encoder_rnn_size) final_state = final_state[-1] h_1, h_2 = final_state[0], final_state[1] final_state = t.cat([h_1, h_2], 1) context_.append(final_state) return encoder_outputs, final_state, h_0, c_0, context_
def only_decoder_beam(self, decoder_input, z, drop_prob, encoder_outputs, initial_state=None): assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' [beam_batch_size, _, _] = decoder_input.size() ''' decoder rnn is conditioned on context via additional bias = W_cond * z to every input token ''' decoder_input = F.dropout(decoder_input, drop_prob) z = z.unsqueeze(0) z = t.cat([z] * beam_batch_size, 0) decoder_input = t.cat([decoder_input, z], 2) rnn_out, final_state = self.batch_unrolling(decoder_input, initial_state, False) return rnn_out, final_state
def forward(self, decoder_input, z, drop_prob, initial_state=None): assert parameters_allocation_check(self) [batch_size, seq_len, _] = decoder_input.size() decoder_input = F.dropout(decoder_input, drop_prob) z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size) decoder_input = t.cat([decoder_input, z], 2) rnn_out, final_state = self.rnn(decoder_input, initial_state) rnn_out = rnn_out.contiguous().view(-1, self.params.decoder_rnn_size) result = self.fc(rnn_out) result = result.view(batch_size, seq_len, self.params.word_vocab_size) return result, final_state
def forward(self, decoder_input, z, drop_prob): """ :param decoder_input: tensor with shape of [batch_size, seq_len, embed_size] :param z: sequence latent variable with shape of [batch_size, latent_variable_size] :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :return: unnormalized logits of sentense words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' [batch_size, seq_len, _] = decoder_input.size() ''' decoder is conditioned on context via additional bias = W_cond * z to every input token ''' z = t.cat([z] * seq_len, 1).view(batch_size, seq_len, self.params.latent_variable_size) decoder_input = t.cat([decoder_input, z], 2) decoder_input = F.dropout(decoder_input, drop_prob) # x is tensor with shape [batch_size, input_size=in_channels, seq_len=input_width] x = decoder_input.transpose(1, 2).contiguous() for layer, kernel in enumerate(self.kernels): # apply conv layer with non-linearity and drop last elements of sequence to perfrom input shifting x = F.conv1d(x, kernel, bias=self.biases[layer], dilation=self.params.decoder_dilations[layer], padding=self.params.decoder_paddings[layer]) x_width = x.size()[2] x = x[:, :, :(x_width - self.params.decoder_paddings[layer])].contiguous() x = F.relu(x) x = x.transpose(1, 2).contiguous() x = x.view(-1, self.out_size) x = self.fc(x) result = x.view(-1, seq_len, self.params.word_vocab_size) return result
def only_decoder_beam(self, decoder_input, z, drop_prob, initial_state=None): assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' # print decoder_input.size() [beam_batch_size, _, _] = decoder_input.size() ''' decoder rnn is conditioned on context via additional bias = W_cond * z to every input token ''' decoder_input = F.dropout(decoder_input, drop_prob) z = z.unsqueeze(0) # print z.size() z = t.cat([z] * beam_batch_size, 0) # print z.size() # z = z.contiguous().view(1, -1) # z = z.view(beam_batch_size, self.params.latent_variable_size) # print z.size() decoder_input = t.cat([decoder_input, z], 2) # print "decoder_input:",decoder_input.size() rnn_out, final_state = self.rnn(decoder_input, initial_state) # print "rnn_out:",rnn_out.size() # print "final_state_1:",final_state[0].size() # print "final_state_1:",final_state[1].size() return rnn_out, final_state
def forward(self, input_labes, out_labels, num_sampled): """ :param input_labes: Tensor with shape of [batch_size] of Long type :param out_labels: Tensor with shape of [batch_size] of Long type :param num_sampled: An int. The number of sampled from noise examples :return: Loss estimation with shape of [batch_size] loss defined in Mikolov et al. Distributed Representations of Words and Phrases and their Compositionality papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf """ assert parameters_allocation_check(self), \ """ Invalid CUDA options. out_embed and in_embed parameters both should be stored in the same memory got out_embed.is_cuda = {}, in_embed.is_cuda = {} """.format(self.out_embed.weight.is_cuda, self.in_embed.weight.is_cuda) use_cuda = self.out_embed.weight.is_cuda [batch_size] = input_labes.size() input = self.in_embed(input_labes) output = self.out_embed(out_labels) noise = Variable( t.Tensor(batch_size, num_sampled).uniform_(0, self.num_classes - 1).long()) if use_cuda: noise = noise.cuda() noise = self.out_embed(noise).neg() log_target = (input * output).sum(1).squeeze().sigmoid().log() ''' ∑[batch_size, num_sampled, embed_size] * [batch_size, embed_size, 1] -> ∑[batch_size, num_sampled] -> [batch_size] ''' sum_log_sampled = t.bmm( noise, input.unsqueeze(2)).sigmoid().log().sum(1).squeeze() loss = log_target + sum_log_sampled return -loss
def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): assert parameters_allocation_check(self) use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None) if z is None: [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) context, h_0, c_0 = self.encoder(encoder_input, None) State = (h_0, c_0) context_2, _, _ = self.encoder_2(encoder_input_2, State) mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() # encoder_input = self.embedding(encoder_word_input, encoder_character_input) # _ , h_0 , c_0 = self.encoder_3(encoder_input, None) initial_state = State else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) out, final_state = self.decoder(decoder_input_2, z, drop_prob, initial_state) return out, final_state, kld, mu, std
def forward(self, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None): #Modified the parameters of forward function according to Encoder-2 """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input) ''' ===================================================Doing the same for encoder-2=================================================== ''' [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2) ''' ================================================================================================================================== ''' context, h_0, c_0 = self.encoder(encoder_input, None) State = (h_0, c_0) #Final state of Encoder-1 context_2, _, _ = self.encoder_2(encoder_input_2, State) #Encoder_2 for Ques_2 mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() # encoder_input = self.embedding(encoder_word_input, encoder_character_input) # _ , h_0 , c_0 = self.encoder_3(encoder_input, None) initial_state = State #Final state of Encoder-1 else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed( decoder_word_input_2 ) # What to do with this decoder input ? --> Slightly resolved out, final_state = self.decoder( decoder_input_2, z, drop_prob, initial_state) # Take a look at the decoder return out, final_state, kld, mu, std
def forward(self, unk_idx: int, drop_prob: float, encoder_word_input: object = None, encoder_character_input: object = None, encoder_word_input_2: object = None, encoder_character_input_2: object = None, decoder_word_input_2: object = None, decoder_character_input_2: object = None, z: object = None, initial_state: tuple = None) -> tuple: """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check(self), \ 'Invalid CUDA options. Parameters should be allocated in the same memory' use_cuda = self.embedding.word_embed.weight.is_cuda assert z is None and fold(lambda acc, parameter: acc and parameter is not None, [encoder_word_input, encoder_character_input, decoder_word_input_2], True) \ or (z is not None and decoder_word_input_2 is not None), \ "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: ''' Get context from encoder and sample z ~ N(mu, std) ''' [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input, unk_idx, drop_prob) ''' ===================================================Doing the same for encoder-2=================================================== ''' [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2, unk_idx, drop_prob) ''' ================================================================================================================================== ''' enc_out_original, context, h_0, c_0, _ = self.encoder_original( encoder_input, None) state_original = (h_0, c_0) # Final state of Encoder-1 enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase( encoder_input_2, state_original) # Encoder_2 for Ques_2 state_paraphrase = (h_0, c_0) # Final state of Encoder-2 if context_ is not None: mu_ = [] logvar_ = [] for entry in context_: mu_.append(self.context_to_mu(entry)) logvar_.append(self.context_to_logvar(entry)) std = t.exp(0.5 * logvar_[-1]) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu_[-1] mu = t.stack(mu_) logvar = t.stack(logvar_) kld = -0.5 * t.sum(1 + logvar - mu.pow(2) - logvar.exp()) kld = kld / mu.shape[0] else: mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) std = t.exp(0.5 * logvar) z = Variable( t.randn([batch_size, self.params.latent_variable_size])) if use_cuda: z = z.cuda() z = z * std + mu kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None mu = None std = None decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) out, final_state = self.decoder(decoder_input_2, z, drop_prob, enc_out_paraphrase, state_original) return out, final_state, kld, mu, std
def forward( self, unk_idx, drop_prob, encoder_word_input=None, encoder_character_input=None, encoder_word_input_2=None, encoder_character_input_2=None, decoder_word_input_2=None, decoder_character_input_2=None, z=None, initial_state=None, ): # Modified the parameters of forward function according to Encoder-2 """ :param encoder_word_input: An tensor with shape of [batch_size, seq_len] of Long type :param encoder_character_input: An tensor with shape of [batch_size, seq_len, max_word_len] of Long type :param decoder_word_input: An tensor with shape of [batch_size, max_seq_len + 1] of Long type :param initial_state: initial state of decoder rnn in order to perform sampling :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout :param z: context if sampling is performing :return: unnormalized logits of sentence words distribution probabilities with shape of [batch_size, seq_len, word_vocab_size] final rnn state with shape of [num_layers, batch_size, decoder_rnn_size] """ assert parameters_allocation_check( self ), "Invalid CUDA options. Parameters should be allocated in the same memory" use_cuda = self.embedding.word_embed.weight.is_cuda assert ( z is None and fold( lambda acc, parameter: acc and parameter is not None, [ encoder_word_input, encoder_character_input, decoder_word_input_2 ], True, ) or (z is not None and decoder_word_input_2 is not None) ), "Invalid input. If z is None then encoder and decoder inputs should be passed as arguments" if z is None: """Get context from encoder and sample z ~ N(mu, std)""" # 把word和character拼接成一个向量 [batch_size, _] = encoder_word_input.size() encoder_input = self.embedding(encoder_word_input, encoder_character_input, unk_idx, drop_prob) """ ===================================================Doing the same for encoder-2=================================================== """ [batch_size_2, _] = encoder_word_input_2.size() encoder_input_2 = self.embedding_2(encoder_word_input_2, encoder_character_input_2, unk_idx, drop_prob) """ ================================================================================================================================== """ enc_out_original, context, h_0, c_0, _ = self.encoder_original( encoder_input, None) state_original = (h_0, c_0) # Final state of Encoder-1 原始句子编码 # state_original = context enc_out_paraphrase, context_2, h_0, c_0, context_ = self.encoder_paraphrase( encoder_input_2, state_original) # Encoder_2 for Ques_2 接下去跟释义句编码 state_paraphrase = (h_0, c_0) # Final state of Encoder-2 原始句子编码 # state_paraphrase = context_2 if context_ is not None: mu_ = [] logvar_ = [] for entry in context_: mu_.append(self.context_to_mu(entry)) logvar_.append(self.context_to_logvar(entry)) z_sampled = self.sample_gaussian(batch_size) if use_cuda: z_sampled = z_sampled.cuda() mu = t.stack(mu_) logvar = t.stack(logvar_) if self.params.wae: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar_[-1], mu_[-1], 1).cuda() p = t.distributions.Normal(mu, t.exp(logvar)) q = t.distributions.Normal(mu, t.ones(logvar.size()).cuda()) kld = t.sum(t.distributions.kl_divergence(p, q)) kld = kld / mu.shape[0] kld = 0 for i in range(len(mu_)): p = t.distributions.Normal(mu_[i], t.exp(logvar_[i])) q = t.distributions.Normal( mu_[i], t.ones(logvar.size()).cuda()) kld += t.sum(t.distributions.kl_divergence(p, q)) kld = kld / len(mu_) wasserstein_loss = self.imq_kernel( z_sampled, z_tilda, self.params.latent_variable_size) kld = 0.01 * kld + 10 * wasserstein_loss else: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar_[-1], mu_[-1], 0.5).cuda() kld = 0 for i in range(len(mu_)): kld += (-0.5 * t.sum( logvar_[i] - t.pow(mu_[i], 2) - t.exp(logvar_[i]) + 1, 1)).mean().squeeze() kld = kld / len(mu_) else: mu = self.context_to_mu(context_2) logvar = self.context_to_logvar(context_2) z_sampled = self.sample_gaussian(batch_size) if use_cuda: z_sampled = z_sampled.cuda() if self.params.wae: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar, mu, 1).cuda() p = t.distributions.Normal(mu, t.exp(logvar)) q = t.distributions.Normal(mu, t.ones(logvar.size()).cuda()) kld = t.sum(t.distributions.kl_divergence(p, q)) wasserstein_loss = self.imq_kernel( z_sampled, z_tilda, self.params.latent_variable_size) kld = 0.01 * kld + 10 * wasserstein_loss else: z_tilda = self.sample_z_tilda_from_posterior( z_sampled, logvar, mu, 0.5).cuda() kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean().squeeze() else: kld = None mu = None std = None # What to do with this decoder input ? --> Slightly resolved decoder_input_2 = self.embedding_2.word_embed(decoder_word_input_2) # if context_ is not None: # decoder_input_2 = t.ones(decoder_input_2.size()).cuda() out, final_state = self.decoder(decoder_input_2, z_tilda, drop_prob, enc_out_paraphrase, state_original) return out, final_state, kld, mu, None