示例#1
0
    def validation(self):
        self.model.eval()
        all_prediction, all_ys = [], []
        gold_transcripts = []
        total_loss = 0.
        for step, data in enumerate(self.dev_loader):
            bos = self.vocab['<BOS>']
            eos = self.vocab['<EOS>']
            pad = self.vocab['<PAD>']
            xs, ilens, ys, ys_in, ys_out, _, _, trans = to_gpu(
                data, bos, eos, pad)
            log_probs , prediction, attns, _, _ =\
                self.model(xs, ilens, None,
                           max_dec_timesteps=self.config['max_dec_timesteps'])
            seq_len = [y.size(0) + 1 for y in ys]
            mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
            loss = (-torch.sum(log_probs * mask)) / sum(seq_len)
            total_loss += loss.item()

            all_prediction = all_prediction + prediction.cpu().numpy().tolist()
            all_ys = all_ys + [y.cpu().numpy().tolist() for y in ys]
            gold_transcripts += trans

        avg_loss = total_loss / len(self.dev_loader)
        cer, prediction_sents, ground_truth_sents = self.ind2sent(
            all_prediction, all_ys)
        self.model.train()
        return avg_loss, cer, prediction_sents, ground_truth_sents
示例#2
0
 def mask_and_cal_sum(self, log_probs, ys, mask=None):
     if mask is None: 
         seq_len = [y.size(0) + 1 + 4 for y in ys]
         mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
     else:
         seq_len = [y.size(0) for y in ys]
     # divide by total length
     loss = torch.sum(log_probs * mask) / sum(seq_len)
     return loss
示例#3
0
 def mask_and_cal_loss(self, log_probs, ys, mask=None):
     # mask is batch x max_len
     # add 1 to EOS
     if mask is None:
         seq_len = [y.size(0) + 1 for y in ys]
         mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
     else:
         seq_len = [y.size(0) for y in ys]
     # divide by total length
     loss = -torch.sum(log_probs * mask) / sum(seq_len)
     return loss
示例#4
0
    def validation(self):
        self.encoder.eval()
        self.encoder2.eval()
        self.decoder.eval()
        self.attention.eval()
        self.disen_clean.eval()
        self.disen_nuisance.eval()
        self.reconstructor.eval()
        all_prediction, all_ys = [], []
        gold_transcripts = []
        total_loss = 0.
        for step, data in enumerate(self.dev_loader):
            bos = self.vocab['<BOS>']
            eos = self.vocab['<EOS>']
            pad = self.vocab['<PAD>']
            xs, ilens, ys, ys_in, ys_out, _, _, trans = to_gpu(
                data, bos, eos, pad)

            # input the encoder
            clean_repre, enc_lens, _, _ = self.encoder(xs, ilens)

            # feeding
            logits, log_probs, prediction, attns = \
                self.decoder(clean_repre, enc_lens, None,
                             max_dec_timesteps=self.config['max_dec_timesteps'])

            seq_len = [y.size(0) + 1 for y in ys]
            mask = cc(_seq_mask(seq_len=seq_len, max_len=log_probs.size(1)))
            loss = (-torch.sum(log_probs * mask)) / sum(seq_len)
            total_loss += loss.item()

            all_prediction = all_prediction + prediction.cpu().numpy().tolist()
            all_ys = all_ys + [y.cpu().numpy().tolist() for y in ys]
            gold_transcripts += trans

        # calculate loss
        avg_loss = total_loss / len(self.dev_loader)
        cer, prediction_sents, ground_truth_sents = self.ind2sent(
            all_prediction, all_ys)

        self.encoder.train()
        self.encoder2.train()
        self.decoder.train()
        self.attention.train()
        self.disen_clean.train()
        self.disen_nuisance.train()
        self.reconstructor.train()
        return avg_loss, cer, prediction_sents, ground_truth_sents