def forward(self, inputs, input_lens=None, noise=False): if self.embedding is not None: inputs = self.embedding(inputs) batch_size, seq_len, emb_size = inputs.size() inputs = F.dropout(inputs, 0.5, self.training) if input_lens is not None: input_lens_sorted, indices = input_lens.sort(descending=True) inputs_sorted = inputs.index_select(0, indices) inputs = pack_padded_sequence(inputs_sorted, input_lens_sorted.data.tolist(), batch_first=True) init_hidden = gVar( torch.zeros(self.n_layers * (1 + self.bidirectional), batch_size, self.hidden_size)) hids, h_n = self.rnn(inputs, init_hidden) if input_lens is not None: _, inv_indices = indices.sort() hids, lens = pad_packed_sequence(hids, batch_first=True) hids = hids.index_select(0, inv_indices) h_n = h_n.index_select(1, inv_indices) h_n = h_n.view(self.n_layers, (1 + self.bidirectional), batch_size, self.hidden_size) h_n = h_n[-1] enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1) if noise and self.noise_radius > 0: gauss_noise = gVar( torch.normal(means=torch.zeros(enc.size()), std=self.noise_radius)) enc = enc + gauss_noise return enc, hids
def forward(self, context, context_lens, utt_lens, floors, noise=False): batch_size, max_context_len, max_utt_len = context.size() utts = context.view(-1, max_utt_len) utt_lens = utt_lens.view(-1) utt_encs, _ = self.utt_encoder(utts, utt_lens) utt_encs = utt_encs.view(batch_size, max_context_len, -1) floor_one_hot = gVar(torch.zeros(floors.numel(), 2)) floor_one_hot.data.scatter_(1, floors.view(-1, 1), 1) floor_one_hot = floor_one_hot.view(-1, max_context_len, 2) utt_floor_encs = torch.cat([utt_encs, floor_one_hot], 2) utt_floor_encs = F.dropout(utt_floor_encs, 0.25, self.training) context_lens_sorted, indices = context_lens.sort(descending=True) utt_floor_encs = utt_floor_encs.index_select(0, indices) utt_floor_encs = pack_padded_sequence( utt_floor_encs, context_lens_sorted.data.tolist(), batch_first=True) init_hidden = gVar(torch.zeros(1, batch_size, self.hidden_size)) hids, h_n = self.rnn(utt_floor_encs, init_hidden) _, inv_indices = indices.sort() h_n = h_n.index_select(1, inv_indices) enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1) if noise and self.noise_radius > 0: gauss_noise = gVar( torch.normal(means=torch.zeros(enc.size()), std=self.noise_radius)) enc = enc + gauss_noise return enc
def sampling(self, init_hidden, context, maxlen, SOS_tok, EOS_tok, mode='greedy'): batch_size = init_hidden.size(0) decoded_words = np.zeros((batch_size, maxlen), dtype=np.int) sample_lens = np.zeros(batch_size, dtype=np.int) decoder_input = gVar(torch.LongTensor([[SOS_tok] * batch_size]).view(batch_size, 1)) decoder_input = self.embedding(decoder_input) if self.embedding is not None else decoder_input decoder_input = torch.cat([decoder_input, context.unsqueeze(1)], 2) if context is not None else decoder_input decoder_hidden = init_hidden.unsqueeze(0) for di in range(maxlen): decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden) decoder_output = self.out(decoder_output) if mode == 'greedy': topi = decoder_output[:, -1].max(1, keepdim=True)[1] elif mode == 'sample': topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1) decoder_input = self.embedding(topi) if self.embedding is not None else topi decoder_input = torch.cat([decoder_input, context.unsqueeze(1)], 2) if context is not None else decoder_input ni = topi.squeeze().data.cpu().numpy() decoded_words[:, di] = ni for i in range(batch_size): for word in decoded_words[i]: if word == EOS_tok: break sample_lens[i] = sample_lens[i] + 1 return decoded_words, sample_lens
def forward(self, context): batch_size, _ = context.size() context = self.fc(context) mu = self.context_to_mu(context) logsigma = self.context_to_logsigma(context) std = torch.exp(0.5 * logsigma) epsilon = gVar(torch.randn([batch_size, self.z_size])) z = epsilon * std + mu return z, mu, logsigma
def evaluate(model, metrics, test_loader, ivocab, vocab, repeat, PAD_token=0): recall_bleus, prec_bleus, bows_extrema, bows_avg, bows_greedy, intra_dist1s, intra_dist2s, \ avg_lens, inter_dist1s, inter_dist2s = [], [], [], [], [], [], [], [], [], [] local_t = 0 model.eval() pbar = tqdm(range(test_loader.num_batch)) for bat in pbar: batch = test_loader.next_batch() if bat == test_loader.num_batch: break # end of epoch local_t += 1 context, context_lens, utt_lens, floors, _, _, _, response, res_lens, _ = batch # remove the sos token in the context and reduce the context length context, utt_lens = context[:, :, 1:], utt_lens - 1 if local_t % 2000 == 0: logging.info("Batch %d \n" % (local_t)) # print the context start = np.maximum(0, context_lens[0] - 5) for t_id in range(start, context.shape[1], 1): context_str = indexes2sent(context[0, t_id], ivocab, ivocab["</s>"], PAD_token) if local_t % 2000 == 0: logging.info("Context %d-%d: %s\n" % (t_id, floors[0, t_id], context_str)) # print the true outputs ref_str, _ = indexes2sent(response[0], ivocab, ivocab["</s>"], ivocab["<s>"]) ref_tokens = ref_str.split(' ') if local_t % 2000 == 0: logging.info("Target >> %s\n" % (ref_str.replace(" ' ", "'"))) context, context_lens, utt_lens, floors = gVar(context), gVar( context_lens), gVar(utt_lens), gData(floors) sample_words, sample_lens = model.sample(context, context_lens, utt_lens, floors, repeat, ivocab["<s>"], ivocab["</s>"]) # nparray: [repeat x seq_len] pred_sents, _ = indexes2sent(sample_words, ivocab, ivocab["</s>"], PAD_token) pred_tokens = [sent.split(' ') for sent in pred_sents] for r_id, pred_sent in enumerate(pred_sents): if local_t % 2000 == 0: logging.info("Sample %d >> %s\n" % (r_id, pred_sent.replace(" ' ", "'"))) max_bleu, avg_bleu = metrics.sim_bleu(pred_tokens, ref_tokens) recall_bleus.append(max_bleu) prec_bleus.append(avg_bleu) bow_extrema, bow_avg, bow_greedy = metrics.sim_bow( sample_words, sample_lens, response[:, 1:], res_lens - 2) bows_extrema.append(bow_extrema) bows_avg.append(bow_avg) bows_greedy.append(bow_greedy) intra_dist1, intra_dist2, inter_dist1, inter_dist2 = metrics.div_distinct( sample_words, sample_lens) intra_dist1s.append(intra_dist1) intra_dist2s.append(intra_dist2) avg_lens.append(np.mean(sample_lens)) inter_dist1s.append(inter_dist1) inter_dist2s.append(inter_dist2) recall_bleu = float(np.mean(recall_bleus)) prec_bleu = float(np.mean(prec_bleus)) f1 = 2 * (prec_bleu * recall_bleu) / (prec_bleu + recall_bleu + 10e-12) bow_extrema = float(np.mean(bows_extrema)) bow_avg = float(np.mean(bows_avg)) bow_greedy = float(np.mean(bows_greedy)) intra_dist1 = float(np.mean(intra_dist1s)) intra_dist2 = float(np.mean(intra_dist2s)) avg_len = float(np.mean(avg_lens)) inter_dist1 = float(np.mean(inter_dist1s)) inter_dist2 = float(np.mean(inter_dist2s)) report = "Avg recall BLEU %f, avg precision BLEU %f, F1 %f, \nbow_avg %f, bow_extrema %f, bow_greedy %f, \n" \ "intra_dist1 %f, intra_dist2 %f, inter_dist1 %f, inter_dist2 %f, \navg_len %f" \ % (recall_bleu, prec_bleu, f1, bow_avg, bow_extrema, bow_greedy, intra_dist1, intra_dist2, inter_dist1, inter_dist2, avg_len) print(report) logging.info(report + "\n") print("Done testing") model.train() return recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2
n_iters = train_loader.num_batch / max(1, config['n_iters_d']) itr = 1 pbar = tqdm(range(train_loader.num_batch)) for bat in pbar: model.train() loss_records = [] batch = train_loader.next_batch() if bat == train_loader.num_batch: break # end of epoch context, context_lens, utt_lens, floors, _, _, _, response, res_lens, _ = batch # remove the sos token in the context and reduce the context length context, utt_lens = context[:, :, 1:], utt_lens - 1 context, context_lens, utt_lens, floors, response, res_lens \ = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors), gVar(response), gVar(res_lens) loss_AE = model.train_AE(context, context_lens, utt_lens, floors, response, res_lens) loss_records.extend(loss_AE) loss_G = model.train_G(context, context_lens, utt_lens, floors, response, res_lens) loss_records.extend(loss_G) for i in range(config['n_iters_d']): # train discriminator/critic loss_D = model.train_D(context, context_lens, utt_lens, floors, response, res_lens) if i == 0: loss_records.extend(loss_D) if i == config['n_iters_d'] - 1: