Exemplo n.º 1
0
    def generate(self, data_iter, CUDA_OK):
        abatch = next(iter(data_iter))
        src_tokens, prev_tgt_tokens, tgt_tokens = prepare_batch(
            abatch, CUDA_OK)
        with torch.no_grad():
            out_tokens = torch.argmax(nn.functional.softmax(self.model(
                src_tokens, prev_tgt_tokens),
                                                            dim=-1),
                                      dim=-1)

        def show_src_tgt_out(src, tgt, out):
            batch_size = out.size(0)
            for b in range(batch_size):
                print('\n|src: ', end=" ")
                for i in range(src.size(1)):
                    print(self.SRC_VOCAB.itos[src[b, i]], end=' ')
                print('\n|gold: ', end=" ")
                for i in range(tgt.size(1)):
                    print(self.TGT_VOCAB.itos[tgt[b, i]], end='')
                print('\n|out: ', end=" ")
                for i in range(out.size(1)):
                    print(self.TGT_VOCAB.itos[out[b, i]], end='')
                print()

        show_src_tgt_out(src_tokens, tgt_tokens, out_tokens)
Exemplo n.º 2
0
 def _valid_epoch(self, valid_iter):
     self.model.eval()
     n_batches = len(list(iter(valid_iter)))
     with torch.no_grad():
         loss_list = []
         for _, batch in enumerate(valid_iter, start=1):
             src_tokens, prev_tgt_tokens, tgt_tokens = prepare_batch(
                 batch, CUDA_OK=self.cuda_ok)
             out = self.model(src_tokens, prev_tgt_tokens)
             loss = self.criterion(out.reshape(-1, out.size(-1)),
                                   tgt_tokens.contiguous().view(-1))
             loss_list.append(loss)
     return sum(loss_list) / n_batches
Exemplo n.º 3
0
    def _train_epoch(self, train_iter, epoch, log_interval):
        self.model.train()
        n_batches = len(train_iter)
        for i, batch in enumerate(train_iter, start=1):
            self._n_steps += 1
            self.optimizer.zero_grad()
            src_tokens, prev_tgt_tokens, tgt_tokens = prepare_batch(
                batch, use_cuda=self.use_cuda)
            model_out = self.model(src_tokens, prev_tgt_tokens)
            loss, nll_loss, n_correct, n_word = self._cal_performance(pred=model_out, gold=tgt_tokens)
            loss.backward()
            self.optimizer.step()

            acc = n_correct / n_word
            if i % log_interval == 0:
                self.logger.info('Epoch: {}, batch: [{}/{}], lr: {:.6f}, loss: {:.5f}, ppl: {:.2f}, acc: {:.2%}, n_steps: {}'
                    .format(epoch, i, n_batches, self._get_lr(), loss.item() / n_word, math.exp(nll_loss.item() / n_word), acc, self._n_steps))
Exemplo n.º 4
0
    def _valid_epoch(self, valid_iter):
        self.model.eval()
        total_loss, total_nll_loss, total_words, correct_words = 0, 0, 0, 0

        with torch.no_grad():
            for batch in valid_iter:
                src_tokens, prev_tgt_tokens, tgt_tokens = prepare_batch(
                    batch, use_cuda=self.use_cuda)
                model_out = self.model(src_tokens, prev_tgt_tokens)
                loss, nll_loss, n_correct, n_word = self._cal_performance(pred=model_out, gold=tgt_tokens)
                
                total_loss += loss.item()
                total_nll_loss += nll_loss.item()
                total_words, correct_words =  total_words + n_word, correct_words + n_correct

        loss_per_word, nll_loss_per_word = total_loss / total_words, total_nll_loss / total_words
        accuracy = correct_words / total_words
        return loss_per_word, nll_loss_per_word, accuracy
Exemplo n.º 5
0
    def _train_epoch(self, train_iter, epoch):
        self.model.train()
        n_batches = len(list(iter(train_iter)))
        for i, batch in enumerate(train_iter, start=1):
            self._lr_step_update()
            self.optimizer.zero_grad()
            src_tokens, prev_tgt_tokens, tgt_tokens = prepare_batch(
                batch, CUDA_OK=self.cuda_ok)
            out = self.model(src_tokens, prev_tgt_tokens)
            loss = self.criterion(out.reshape(-1, out.size(-1)),
                                  tgt_tokens.contiguous().view(-1))
            loss.backward()

            self.optimizer.step()
            if i % 100 == 0:
                print('{} | Epoch: {}, batch: [{}/{}], lr:{:.5}, loss: {:.5}'.
                      format(
                          time.strftime("%y-%m-%d %H:%M:%S", time.localtime()),
                          epoch, i, n_batches, self._get_lr(), loss.item()))
Exemplo n.º 6
0
    def generate(self,
                 src,
                 tgt,
                 data_path,
                 result_save_path,
                 batch_size=4096,
                 quiet=False):
        exts = ('.' + src, '.' + tgt)
        test_path = data_path + '/test' if os.path.isdir(
            data_path) else data_path
        test = datasets.TranslationDataset(path=test_path,
                                           exts=exts,
                                           fields=(('src', self.dl.SRC),
                                                   ('trg', self.dl.TGT)))

        test_iter = SortedIterator(test,
                                   batch_size=batch_size,
                                   device=None,
                                   repeat=False,
                                   sort_key=lambda x: (len(x.src), len(x.trg)),
                                   batch_size_fn=batch_size_fn,
                                   train=False,
                                   shuffle=True)

        result_path = result_save_path + '/result.txt'
        start_time = time.time()
        with open(result_path, 'w', encoding='utf8') as f, torch.no_grad():
            test_iter = tqdm(test_iter) if quiet else test_iter
            for batch in test_iter:
                src_tokens, _, tgt_tokens = prepare_batch(
                    batch, use_cuda=self.use_cuda)
                if self.beam_size > 0:
                    pred_tokens, _ = beam_search(
                        model=self.model,
                        src_tokens=src_tokens,
                        beam_size=self.beam_size,
                        length_penalty=self.length_penalty,
                        max_seq_len=self.max_seq_len,
                        bos=self.bos,
                        eos=self.eos,
                        src_pdx=self.src_pdx,
                        tgt_pdx=self.tgt_pdx)
                else:
                    pred_tokens = greedy_search(model=self.model,
                                                src_tokens=src_tokens,
                                                max_seq_len=self.max_seq_len,
                                                bos=self.bos,
                                                eos=self.eos,
                                                src_pdx=self.src_pdx,
                                                tgt_pdx=self.tgt_pdx)

                src_sentences = de_numericalize(self.dl.SRC.vocab, src_tokens)
                tgt_sentences = de_numericalize(self.dl.TGT.vocab, tgt_tokens)
                pred_sentences = de_numericalize(self.dl.TGT.vocab,
                                                 pred_tokens)

                for src_words, tgt_words, pred_words in zip(
                        src_sentences, tgt_sentences, pred_sentences):
                    content = '-S\t{}\n-T\t{}\n-P\t{}\n\n'.format(
                        ' '.join(src_words), ' '.join(tgt_words),
                        ' '.join(pred_words))
                    f.write(content)
                    if not quiet:
                        print(content)

        print(
            'Successful. Generate time: {:.1f} min, the result has saved at {}'
            .format((time.time() - start_time) / 60, result_path))