예제 #1
0
    def forward(self, inputs, input_lens=None, noise=False):
        if self.embedding is not None:
            inputs = self.embedding(inputs)

        batch_size, seq_len, emb_size = inputs.size()
        inputs = F.dropout(inputs, 0.5, self.training)

        if input_lens is not None:
            input_lens_sorted, indices = input_lens.sort(descending=True)
            inputs_sorted = inputs.index_select(0, indices)
            inputs = pack_padded_sequence(inputs_sorted,
                                          input_lens_sorted.data.tolist(),
                                          batch_first=True)

        init_hidden = gVar(
            torch.zeros(self.n_layers * (1 + self.bidirectional), batch_size,
                        self.hidden_size))
        hids, h_n = self.rnn(inputs, init_hidden)
        if input_lens is not None:
            _, inv_indices = indices.sort()
            hids, lens = pad_packed_sequence(hids, batch_first=True)
            hids = hids.index_select(0, inv_indices)
            h_n = h_n.index_select(1, inv_indices)
        h_n = h_n.view(self.n_layers, (1 + self.bidirectional), batch_size,
                       self.hidden_size)
        h_n = h_n[-1]
        enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1)
        if noise and self.noise_radius > 0:
            gauss_noise = gVar(
                torch.normal(means=torch.zeros(enc.size()),
                             std=self.noise_radius))
            enc = enc + gauss_noise

        return enc, hids
예제 #2
0
    def forward(self, context, context_lens, utt_lens, floors, noise=False):
        batch_size, max_context_len, max_utt_len = context.size()
        utts = context.view(-1, max_utt_len)
        utt_lens = utt_lens.view(-1)
        utt_encs, _ = self.utt_encoder(utts, utt_lens)
        utt_encs = utt_encs.view(batch_size, max_context_len, -1)

        floor_one_hot = gVar(torch.zeros(floors.numel(), 2))
        floor_one_hot.data.scatter_(1, floors.view(-1, 1), 1)
        floor_one_hot = floor_one_hot.view(-1, max_context_len, 2)
        utt_floor_encs = torch.cat([utt_encs, floor_one_hot], 2)

        utt_floor_encs = F.dropout(utt_floor_encs, 0.25, self.training)
        context_lens_sorted, indices = context_lens.sort(descending=True)
        utt_floor_encs = utt_floor_encs.index_select(0, indices)
        utt_floor_encs = pack_padded_sequence(
            utt_floor_encs,
            context_lens_sorted.data.tolist(),
            batch_first=True)

        init_hidden = gVar(torch.zeros(1, batch_size, self.hidden_size))
        hids, h_n = self.rnn(utt_floor_encs, init_hidden)

        _, inv_indices = indices.sort()
        h_n = h_n.index_select(1, inv_indices)

        enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1)

        if noise and self.noise_radius > 0:
            gauss_noise = gVar(
                torch.normal(means=torch.zeros(enc.size()),
                             std=self.noise_radius))
            enc = enc + gauss_noise
        return enc
예제 #3
0
    def sampling(self, init_hidden, context, maxlen, SOS_tok, EOS_tok, mode='greedy'):
        batch_size = init_hidden.size(0)
        decoded_words = np.zeros((batch_size, maxlen), dtype=np.int)
        sample_lens = np.zeros(batch_size, dtype=np.int)

        decoder_input = gVar(torch.LongTensor([[SOS_tok] * batch_size]).view(batch_size, 1))
        decoder_input = self.embedding(decoder_input) if self.embedding is not None else decoder_input
        decoder_input = torch.cat([decoder_input, context.unsqueeze(1)], 2) if context is not None else decoder_input
        decoder_hidden = init_hidden.unsqueeze(0)
        for di in range(maxlen):
            decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden)
            decoder_output = self.out(decoder_output)
            if mode == 'greedy':
                topi = decoder_output[:, -1].max(1, keepdim=True)[1]
            elif mode == 'sample':
                topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1)
            decoder_input = self.embedding(topi) if self.embedding is not None else topi
            decoder_input = torch.cat([decoder_input, context.unsqueeze(1)],
                                      2) if context is not None else decoder_input
            ni = topi.squeeze().data.cpu().numpy()
            decoded_words[:, di] = ni

        for i in range(batch_size):
            for word in decoded_words[i]:
                if word == EOS_tok:
                    break
                sample_lens[i] = sample_lens[i] + 1
        return decoded_words, sample_lens
예제 #4
0
    def forward(self, context):
        batch_size, _ = context.size()
        context = self.fc(context)
        mu = self.context_to_mu(context)
        logsigma = self.context_to_logsigma(context)
        std = torch.exp(0.5 * logsigma)

        epsilon = gVar(torch.randn([batch_size, self.z_size]))
        z = epsilon * std + mu
        return z, mu, logsigma
예제 #5
0
def evaluate(model, metrics, test_loader, ivocab, vocab, repeat, PAD_token=0):
    recall_bleus, prec_bleus, bows_extrema, bows_avg, bows_greedy, intra_dist1s, intra_dist2s, \
    avg_lens, inter_dist1s, inter_dist2s = [], [], [], [], [], [], [], [], [], []
    local_t = 0

    model.eval()
    pbar = tqdm(range(test_loader.num_batch))

    for bat in pbar:
        batch = test_loader.next_batch()
        if bat == test_loader.num_batch: break  # end of epoch

        local_t += 1

        context, context_lens, utt_lens, floors, _, _, _, response, res_lens, _ = batch
        # remove the sos token in the context and reduce the context length
        context, utt_lens = context[:, :, 1:], utt_lens - 1

        if local_t % 2000 == 0:
            logging.info("Batch %d \n" % (local_t))  # print the context

        start = np.maximum(0, context_lens[0] - 5)
        for t_id in range(start, context.shape[1], 1):
            context_str = indexes2sent(context[0, t_id], ivocab,
                                       ivocab["</s>"], PAD_token)
            if local_t % 2000 == 0:
                logging.info("Context %d-%d: %s\n" %
                             (t_id, floors[0, t_id], context_str))
        # print the true outputs
        ref_str, _ = indexes2sent(response[0], ivocab, ivocab["</s>"],
                                  ivocab["<s>"])
        ref_tokens = ref_str.split(' ')

        if local_t % 2000 == 0:
            logging.info("Target >> %s\n" % (ref_str.replace(" ' ", "'")))

        context, context_lens, utt_lens, floors = gVar(context), gVar(
            context_lens), gVar(utt_lens), gData(floors)
        sample_words, sample_lens = model.sample(context, context_lens,
                                                 utt_lens, floors, repeat,
                                                 ivocab["<s>"], ivocab["</s>"])
        # nparray: [repeat x seq_len]
        pred_sents, _ = indexes2sent(sample_words, ivocab, ivocab["</s>"],
                                     PAD_token)
        pred_tokens = [sent.split(' ') for sent in pred_sents]
        for r_id, pred_sent in enumerate(pred_sents):
            if local_t % 2000 == 0:
                logging.info("Sample %d >> %s\n" %
                             (r_id, pred_sent.replace(" ' ", "'")))

        max_bleu, avg_bleu = metrics.sim_bleu(pred_tokens, ref_tokens)
        recall_bleus.append(max_bleu)
        prec_bleus.append(avg_bleu)

        bow_extrema, bow_avg, bow_greedy = metrics.sim_bow(
            sample_words, sample_lens, response[:, 1:], res_lens - 2)
        bows_extrema.append(bow_extrema)
        bows_avg.append(bow_avg)
        bows_greedy.append(bow_greedy)

        intra_dist1, intra_dist2, inter_dist1, inter_dist2 = metrics.div_distinct(
            sample_words, sample_lens)
        intra_dist1s.append(intra_dist1)
        intra_dist2s.append(intra_dist2)
        avg_lens.append(np.mean(sample_lens))
        inter_dist1s.append(inter_dist1)
        inter_dist2s.append(inter_dist2)

    recall_bleu = float(np.mean(recall_bleus))
    prec_bleu = float(np.mean(prec_bleus))
    f1 = 2 * (prec_bleu * recall_bleu) / (prec_bleu + recall_bleu + 10e-12)

    bow_extrema = float(np.mean(bows_extrema))
    bow_avg = float(np.mean(bows_avg))
    bow_greedy = float(np.mean(bows_greedy))

    intra_dist1 = float(np.mean(intra_dist1s))
    intra_dist2 = float(np.mean(intra_dist2s))

    avg_len = float(np.mean(avg_lens))

    inter_dist1 = float(np.mean(inter_dist1s))
    inter_dist2 = float(np.mean(inter_dist2s))

    report = "Avg recall BLEU %f, avg precision BLEU %f, F1 %f, \nbow_avg %f, bow_extrema %f, bow_greedy %f, \n" \
             "intra_dist1 %f, intra_dist2 %f, inter_dist1 %f, inter_dist2 %f, \navg_len %f" \
             % (recall_bleu, prec_bleu, f1, bow_avg, bow_extrema, bow_greedy, intra_dist1, intra_dist2,
                inter_dist1, inter_dist2, avg_len)
    print(report)
    logging.info(report + "\n")
    print("Done testing")

    model.train()

    return recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2
예제 #6
0
    n_iters = train_loader.num_batch / max(1, config['n_iters_d'])

    itr = 1
    pbar = tqdm(range(train_loader.num_batch))

    for bat in pbar:
        model.train()
        loss_records = []
        batch = train_loader.next_batch()
        if bat == train_loader.num_batch: break  # end of epoch

        context, context_lens, utt_lens, floors, _, _, _, response, res_lens, _ = batch
        # remove the sos token in the context and reduce the context length
        context, utt_lens = context[:, :, 1:], utt_lens - 1
        context, context_lens, utt_lens, floors, response, res_lens \
            = gVar(context), gVar(context_lens), gVar(utt_lens), gData(floors), gVar(response), gVar(res_lens)

        loss_AE = model.train_AE(context, context_lens, utt_lens, floors,
                                 response, res_lens)
        loss_records.extend(loss_AE)

        loss_G = model.train_G(context, context_lens, utt_lens, floors,
                               response, res_lens)
        loss_records.extend(loss_G)

        for i in range(config['n_iters_d']):  # train discriminator/critic
            loss_D = model.train_D(context, context_lens, utt_lens, floors,
                                   response, res_lens)
            if i == 0:
                loss_records.extend(loss_D)
            if i == config['n_iters_d'] - 1: