示例#1
0
 def _generative_step(self, batch: dict) -> dict:
     pad_token_id = self.tokenizer.pad_token_id
     source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(
         batch, pad_token_id)
     t0 = time.time()
     generated_ids = self.model.generate(
         input_ids=source_ids,
         attention_mask=source_mask,
         use_cache=True,
         decoder_start_token_id=self.decoder_start_token_id,
     )
     gen_time = (time.time() - t0) / source_ids.shape[0]
     preds = self.ids_to_clean_text(generated_ids)
     target = self.ids_to_clean_text(y)
     loss_tensors = self._step(batch)
     base_metrics = {
         name: loss
         for name, loss in zip(self.loss_names, loss_tensors)
     }
     rouge: Dict = self.calc_generative_metrics(preds, target)
     summ_len = np.mean(lmap(len, generated_ids))
     base_metrics.update(gen_time=gen_time,
                         summ_len=summ_len,
                         preds=preds,
                         target=target,
                         **rouge)
     return base_metrics
示例#2
0
    def test_step(self, batch, batch_idx):
        pad_token_id = self.tokenizer.pad_token_id
        source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(
            batch, pad_token_id)
        # NOTE: the following kwargs get more speed and lower quality summaries than those in evaluate_cnn.py
        generated_ids = self.model.generate(
            input_ids=source_ids,
            attention_mask=source_mask,
            num_beams=1,
            max_length=80,
            repetition_penalty=2.5,
            length_penalty=1.0,
            early_stopping=True,
            use_cache=True,
        )
        preds = [
            self.tokenizer.decode(g,
                                  skip_special_tokens=True,
                                  clean_up_tokenization_spaces=True)
            for g in generated_ids
        ]
        target = [
            self.tokenizer.decode(t,
                                  skip_special_tokens=True,
                                  clean_up_tokenization_spaces=True) for t in y
        ]
        loss = self._step(batch)

        return {"val_loss": loss, "preds": preds, "target": target}
    def _generative_entailment_step(self, batch: dict) -> Tensor:
        """
        Decodes the output and compute the enatailment loss against reference, from the current training step.
        :param batch:
        :return:
        """
        pad_token_id = self.tokenizer.pad_token_id
        source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(
            batch, pad_token_id)
        t0 = time.time()
        generated_ids = self.model.generate(
            input_ids=source_ids,
            attention_mask=source_mask,
            use_cache=True,
            decoder_start_token_id=self.decoder_start_token_id,
        )
        gen_time = (time.time() - t0) / source_ids.shape[0]
        preds = self.ids_to_clean_text(generated_ids)
        target = self.ids_to_clean_text(y)

        entailment_input = [
            InputExample(text_a=target[idx],
                         text_b=pred,
                         guid="",
                         label="entailment") for idx, pred in enumerate(preds)
        ]
        entailment_features = glue_convert_examples_to_features(
            entailment_input,
            tokenizer=self.entailment_tokenizer,
            label_list=['contradiction', 'neutral', 'entailment'],
            output_mode="classification")
        all_input_ids = torch.tensor(
            [f.input_ids for f in entailment_features], dtype=torch.long)
        all_attention_mask = torch.tensor(
            [f.attention_mask for f in entailment_features], dtype=torch.long)
        all_labels = torch.tensor([f.label for f in entailment_features],
                                  dtype=torch.long)

        all_input_ids = all_input_ids.to('cuda')
        all_attention_mask = all_attention_mask.to('cuda')
        all_labels = all_labels.to('cuda')

        with torch.no_grad():
            entailment_output = self.entailment_model(
                input_ids=all_input_ids,
                attention_mask=all_attention_mask,
                labels=all_labels)
            entailment_loss = entailment_output[0].detach()

        return entailment_loss
示例#4
0
    def _generative_step(self, batch):
        pad_token_id = self.tokenizer.pad_token_id
        source_ids, source_mask, y = SummarizationDataset.trim_seq2seq_batch(batch, pad_token_id)
        # TODO(SS): task specific params

        t0 = time.time()
        generated_ids = self.model.generate(input_ids=source_ids, attention_mask=source_mask, use_cache=True,)
        gen_time = time.time() - t0
        preds = self.ids_to_clean_text(generated_ids)
        target = self.ids_to_clean_text(y)
        loss_tensors = self._step(batch)
        base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
        rouge: Dict = calculate_rouge(preds, target)
        summ_len = np.mean(lmap(len, generated_ids))
        base_metrics.update(gen_time=gen_time, summ_len=summ_len, preds=preds, target=target, **rouge)
        return base_metrics