Пример #1
0
    def train_lm(self, epoch):
        self.model.train()

        epoch_loss = 0
        for batch_idx, feature in enumerate(self.train_iter):
            self.optimizer.zero_grad()

            utils.feature_to_device(feature, self.device)

            out = self.model(feature)
            loss = self.out_loss_fn(out.view(-1, out.shape[-1]),
                                    feature.y.view(-1))
            # utils.print_backward_graph(loss)
            loss.backward()

            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.args.clip_grad)
            self.optimizer.step()

            iloss = loss.item()
            epoch_loss += iloss
            self.logger.info(
                f'Step {batch_idx+1}/{epoch+1:02} | Train Loss: {iloss:.3f} | Train PPL: {math.exp(iloss):7.3f} | Time: {secs:.3f}s\n'
            )

        return epoch_loss / len(self.train_iter)
Пример #2
0
    def train(self, epoch, data_iter=None):
        self.model.train()

        if data_iter is None:
            data_iter = self.train_iter

        epoch_loss = 0
        for batch_idx, feature in enumerate(data_iter):
            start_time = time.time()

            utils.feature_to_device(feature, self.device)

            # out, out_lm = torch_cp.checkpoint(self.model, feature)
            out, out_lm = self.model(feature)
            loss, loss_lm = models.AR.loss(self.args.auxiliary_task,
                                           self.out_loss_fn, out, out_lm,
                                           feature.resp, feature.lm.y)
            if self.args.auxiliary_task is not None:
                loss = loss + self.args.alpha * loss_lm
            if self.args.gradient_accumulation > 1:
                loss = loss / self.args.gradient_accumulation
                # accuracy = accuracy / self.args.gradient_accumulation
            # utils.self.logger.info_backward_graph(loss)
            loss.backward()

            iloss = loss.item()
            epoch_loss += iloss

            if self.args.clip_grad is not None:
                nn.utils.clip_grad_norm_(self.model.parameters(),
                                         self.args.clip_grad)

            # self.grad_util.collect(self.model)

            if (batch_idx + 1) % self.args.gradient_accumulation == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()

                if self.args.use_scheduler:
                    self.scheduler.step(iloss)

                end_time = time.time()
                secs = end_time - start_time
                self.logger.info(
                    f'Step {batch_idx+1}/{epoch+1:02} | Train Loss: {iloss:.3f} | Train PPL: {math.exp(iloss):7.3f} | Time: {secs:.3f}s\n'
                )

        return epoch_loss / len(data_iter)
Пример #3
0
    def eval(self, data_iter):
        self.model.eval()

        epoch_loss = 0
        with torch.no_grad():
            for _, feature in enumerate(data_iter):

                utils.feature_to_device(feature, self.device)

                out, out_lm = self.model(feature)
                loss, loss_lm = models.AR.loss(self.args.auxiliary_task,
                                               self.out_loss_fn, out, out_lm,
                                               feature.resp, feature.lm.y)
                if self.args.auxiliary_task is not None:
                    loss = loss + self.args.alpha * loss_lm

                epoch_loss += loss.item()

        return epoch_loss / len(data_iter)
Пример #4
0
    def train(self, epoch, data_iter=None):
        self.model.train()

        if data_iter is None:
            data_iter = self.train_iter

        epoch_loss = 0
        for batch_idx, feature in enumerate(data_iter):
            start_time = time.time()

            self.optimizer.zero_grad()

            utils.feature_to_device(feature, self.device)

            out, out_lm = self.model(feature)
            loss, loss_lm = models.AR.loss(self.out_loss_fn, out, out_lm,
                                           feature.resp, feature.lm.y)
            loss = loss + self.args.alpha * loss_lm

            # utils.print_backward_graph(loss)
            loss.backward()

            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.args.clip_grad)
            self.grad_util.collect(self.model)

            self.optimizer.step()

            iloss = loss.item()
            epoch_loss += iloss

            end_time = time.time()
            secs = end_time - start_time
            self.logger.info(
                f'Step {batch_idx+1}/{epoch+1:02} | Train Loss: {iloss:.3f} | Train PPL: {math.exp(iloss):7.3f} | Time: {secs:.3f}s\n'
            )

        return epoch_loss / len(data_iter)
Пример #5
0
    def run(self):
        self.model.eval()

        total_bleu = 0
        total_f1 = 0
        total_dist1 = 0
        total_dist2 = 0
        total_loss = 0

        print('Run eval...')
        with torch.no_grad():
            for batch_idx, feature in enumerate(self.test_iter):
                utils.feature_to_device(feature, self.device)

                out, out_lm = self.model(feature)
                print(self.vocab.itos(out[3, 0].argmax(dim=0).item()),
                      self.vocab.itos(out_lm[3, 0].argmax(dim=0).item()))
                loss, loss_lm = models.AR.loss(self.out_loss_fn, out, out_lm,
                                               feature.resp, feature.lm.y)
                print(loss, loss_lm)
                loss = loss + self.model_config.alpha * loss_lm
                total_loss += loss.item()

                # target include w1, w2...[EOS], len: max_seq_length + 1
                target = copy.deepcopy(feature.resp[1:])
                # feature will be changed
                pred, pred_padded = utils.sample_sequence(
                    feature, self.vocab, self.model, self.args)

                pred_tokens = [[self.vocab.itos(k) for k in ks] for ks in pred]
                target_tokens = [[[self.vocab.itos(k) for k in ks]]
                                 for ks in target.T.tolist()]
                print('----------------------------------')
                print(
                    'Context: ', ''.join([
                        self.vocab.itos(k)
                        for k in feature.context.T.tolist()[0]
                    ]))
                print(
                    'LM x: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.x.T.tolist()[0]
                    ]))
                print(
                    'LM y: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.y.T.tolist()[0]
                    ]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[0]
                    ]))
                print('Target: ', ''.join(target_tokens[0][0]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[-1]
                    ]))
                print('Target: ', ''.join(target_tokens[-1][0]))
                print('----------------------------------')
                bleu = metrics.bleu_score(pred_tokens, target_tokens)
                f1 = metrics.f1_score(pred_padded.T.to('cpu'),
                                      target.T.to('cpu'))
                # dist1 = metrics.distinct_score([v[:-1] for v in pred])
                dist1 = metrics.distinct_score(pred_tokens)
                dist2 = metrics.distinct_score(pred_tokens, 2)

                total_bleu += bleu
                total_f1 += f1
                total_dist1 += dist1
                total_dist2 += dist2

        l = len(self.test_iter)
        bleu = total_bleu / l
        f1 = total_f1 / l
        dist1 = total_dist1 / l
        dist2 = total_dist2 / l
        # https://stackoverflow.com/questions/59209086/calculate-perplexity-in-pytorch
        # see per-word perplexity:
        # https://github.com/huggingface/transfer-learning-conv-ai/blob/master/convai_evaluation.py#L161
        # https://github.com/facebookresearch/ParlAI/blob/56d46551190a7ffaedccd13534412d43bc7076e5/parlai/scripts/eval_ppl.py
        ppl = math.exp(total_loss / l)

        print(f'\tBleu: {bleu:.8f} | F1: {f1:.8f} | '
              f'Dist1: {dist1:.3f} | Dist2: {dist2:.3f} | PPL: {ppl:7.3f}')
Пример #6
0
 def inputs_labels_from_batch(self, batch_data):
     utils.feature_to_device(batch_data, 'cuda')
     return (batch_data, (batch_data.resp, batch_data.lm))