Пример #1
0
    def _nli_diagnostic_forward(self, batch, task, predict):
        out = {}

        # embed the sentence
        classifier = self._get_classifier(task)
        if self.use_bert:
            sent, mask = self.sent_encoder(batch["inputs"], task)
            logits = classifier(sent, mask)
        else:
            sent1, mask1 = self.sent_encoder(batch["input1"], task)
            sent2, mask2 = self.sent_encoder(batch["input2"], task)
            logits = classifier(sent1, sent2, mask1, mask2)
        out["logits"] = logits
        out["n_exs"] = get_batch_size(batch)

        if "labels" in batch:
            if batch["labels"].dim() == 0:
                labels = batch["labels"].unsqueeze(0)
            elif batch["labels"].dim() == 1:
                labels = batch["labels"]
            else:
                labels = batch["labels"].squeeze(-1)
            out["loss"] = F.cross_entropy(logits, labels)
            # task.update_diagnostic_metrics(predicted, labels, batch)
            task.update_diagnostic_metrics(logits, labels, batch)

        if predict:
            _, predicted = logits.max(dim=1)
            out["preds"] = predicted

        return out
Пример #2
0
    def _single_sentence_forward(self, batch, task, predict):
        out = {}

        # embed the sentence
        word_embs_in_context, sent_mask = self.sent_encoder(batch["input1"], task)
        # pass to a task specific classifier
        classifier = self._get_classifier(task)
        logits = classifier(word_embs_in_context, sent_mask)
        out["logits"] = logits
        out["n_exs"] = get_batch_size(batch)

        if "labels" in batch:  # means we should compute loss
            if batch["labels"].dim() == 0:
                labels = batch["labels"].unsqueeze(0)
            elif batch["labels"].dim() == 1:
                labels = batch["labels"]
            else:
                labels = batch["labels"].squeeze(-1)
            out["loss"] = F.cross_entropy(logits, labels)
            tagmask = batch.get("tagmask", None)
            task.update_metrics(logits, labels, tagmask=tagmask)

        if predict:
            if isinstance(task, RegressionTask):
                if logits.ndimension() > 1:
                    assert (
                        logits.ndimension() == 2 and logits[-1] == 1
                    ), "Invalid regression prediction dimensions!"
                    logits = logits.squeeze(-1)
                out["preds"] = logits
            else:
                _, out["preds"] = logits.max(dim=1)
        return out
Пример #3
0
    def _seq_gen_forward(self, batch, task, predict):
        """ For variational autoencoder """
        out = {}
        sent, sent_mask = self.sent_encoder(batch["inputs"], task)
        out["n_exs"] = get_batch_size(batch)

        if "targs" in batch:
            pass

        if predict:
            pass

        return out
Пример #4
0
    def _pair_sentence_forward(self, batch, task, predict):
        out = {}

        # embed the sentence
        classifier = self._get_classifier(task)
        if self.use_bert:
            sent, mask = self.sent_encoder(batch["inputs"], task)
            # special case for WiC b/c we want to add representations of particular tokens
            if isinstance(task, WiCTask):
                logits = classifier(sent, mask, [batch["idx1"], batch["idx2"]])
            else:
                logits = classifier(sent, mask)
        else:
            sent1, mask1 = self.sent_encoder(batch["input1"], task)
            sent2, mask2 = self.sent_encoder(batch["input2"], task)
            if isinstance(task, WiCTask):
                logits = classifier(sent1, sent2, mask1, mask2, [batch["idx1"]], [batch["idx2"]])
            else:
                logits = classifier(sent1, sent2, mask1, mask2)
        out["logits"] = logits
        out["n_exs"] = get_batch_size(batch)
        tagmask = batch.get("tagmask", None)
        if "labels" in batch:
            labels = batch["labels"]
            labels = labels.squeeze(-1) if len(labels.size()) > 1 else labels
            if isinstance(task, RegressionTask):
                logits = logits.squeeze(-1) if len(logits.size()) > 1 else logits
                out["loss"] = F.mse_loss(logits, labels)
                logits_np = logits.data.cpu().numpy()
                labels_np = labels.data.cpu().numpy()
                task.update_metrics(logits_np, labels_np, tagmask=tagmask)
            else:
                out["loss"] = F.cross_entropy(logits, labels)
                task.update_metrics(logits, labels, tagmask=tagmask)

        if predict:
            if isinstance(task, RegressionTask):
                if logits.ndimension() > 1:
                    assert (
                        logits.ndimension() == 2 and logits[-1] == 1
                    ), "Invalid regression prediction dimensions!"
                    logits = logits.squeeze(-1)
                out["preds"] = logits
            else:
                _, out["preds"] = logits.max(dim=1)
        return out
Пример #5
0
 def _tagger_forward(self, batch: dict, task: TaggingTask, predict: bool) -> dict:
     """
     This function is for sequence tagging (one-to-one mapping between words and tags).
     Args:
             batch: a dict of inputs and target tags
             task: TaggingTask
             predict: (boolean) predict mode (not supported)
     Returns
         out: (dict)
             - 'logits': output layer, dimension: [batchSize * task.max_seq_len, task.num_tags]
             - 'loss': size average CE loss
     """
     out = {}
     # batch[inputs] only has one item
     b_size, seq_len = list(batch["inputs"].values())[0].size()
     seq_len -= 2
     sent_encoder = self.sent_encoder
     out["n_exs"] = get_batch_size(batch)
     if not isinstance(sent_encoder, BiLMEncoder):
         sent, mask = sent_encoder(batch["inputs"], task)
         sent = sent.masked_fill(1 - mask.byte(), 0)  # avoid NaNs
         sent = sent[:, 1:-1, :]
         hid2tag = self._get_classifier(task)
         logits = hid2tag(sent)
         logits = logits.view(b_size * seq_len, -1)
         out["logits"] = logits
         targs = batch["targs"]["words"][:, :seq_len].contiguous().view(-1)
     if "mask" in batch:
         # prevent backprop for tags generated for tokenization-introduced tokens
         # such as word boundaries
         mask = batch["mask"]
         batch_mask = [mask[i][:seq_len] for i in range(b_size)]
         batch_mask = torch.stack(batch_mask)
         keep_idxs = torch.nonzero(batch_mask.view(-1).data).squeeze()
         logits = logits.index_select(0, keep_idxs)
         targs = targs.index_select(0, keep_idxs)
     pad_idx = self.vocab.get_token_index(self.vocab._padding_token)
     out["loss"] = F.cross_entropy(logits, targs, ignore_index=pad_idx)
     task.scorer1(logits, targs)
     return out
Пример #6
0
    def forward(
        self,
        batch: Dict,
        sent_embs: torch.Tensor,
        sent_mask: torch.Tensor,
        task: Task,
        predict: bool,
        cuda_devices: Any,
    ) -> Dict:
        """
        Run forward pass.
        Expects batch to have the following entries:
            'input' : [batch_size, max_len, emb_size]
            'labels' : [batch_size, num_targets] of label indices
            'span1s' : [batch_size, 1, 2], span indices
            'span2s' : [batch_size, 1, 2], span indices
                .
                .
                .
            'span_ts': [batch_size, 1, 2], span indices

        Parameters
        -------------------------------
            batch: dict(str -> Tensor) with entries described above.
            sent_embs: [batch_size, max_len, repr_dim] Tensor
            sent_mask: [batch_size, max_len, 1] Tensor of {0,1}
            task: Task
            predict: whether or not to generate predictions
        This learns different span pooling operators for each span.

        Returns
        -------------------------------
            out: dict(str -> Tensor)
        """
        out = {}

        # Apply projection CNN layer for each span of the input sentence
        sent_embs_t = sent_embs.transpose(1, 2)  # needed for CNN layer
        se_projs = []
        for i in range(self.num_spans):
            se_proj = self.projs[i](sent_embs_t).transpose(2, 1).contiguous()
            se_projs.append(se_proj)

        model_device = self.projs[0].weight.device
        span_embs = torch.Tensor([]).cuda(
            model_device) if torch.cuda.is_available() else torch.Tensor([])
        out["n_exs"] = get_batch_size(batch, cuda_devices)
        _kw = dict(sequence_mask=sent_mask.long())
        for i in range(self.num_spans):
            # spans are [batch_size, num_targets, span_modules]
            span_emb = self.span_extractors[i](
                se_projs[i], batch["span" + str(i + 1) + "s"], **_kw)
            span_embs = torch.cat([span_embs, span_emb], dim=2)

        # [batch_size, num_targets, n_classes]
        logits = self.classifier(span_embs)
        out["logits"] = logits

        # Compute loss if requested.
        if "labels" in batch:
            logits = logits.squeeze(dim=1)
            out["logits"] = logits
            out["loss"] = format_output(
                self.compute_loss(logits, batch["labels"], task), cuda_devices)

        if predict:
            # Return preds as a list.
            preds = self.get_predictions(logits)
            out["preds"] = preds
        return out