def _nli_diagnostic_forward(self, batch, task, predict): out = {} # embed the sentence classifier = self._get_classifier(task) if self.use_bert: sent, mask = self.sent_encoder(batch["inputs"], task) logits = classifier(sent, mask) else: sent1, mask1 = self.sent_encoder(batch["input1"], task) sent2, mask2 = self.sent_encoder(batch["input2"], task) logits = classifier(sent1, sent2, mask1, mask2) out["logits"] = logits out["n_exs"] = get_batch_size(batch) if "labels" in batch: if batch["labels"].dim() == 0: labels = batch["labels"].unsqueeze(0) elif batch["labels"].dim() == 1: labels = batch["labels"] else: labels = batch["labels"].squeeze(-1) out["loss"] = F.cross_entropy(logits, labels) # task.update_diagnostic_metrics(predicted, labels, batch) task.update_diagnostic_metrics(logits, labels, batch) if predict: _, predicted = logits.max(dim=1) out["preds"] = predicted return out
def _single_sentence_forward(self, batch, task, predict): out = {} # embed the sentence word_embs_in_context, sent_mask = self.sent_encoder(batch["input1"], task) # pass to a task specific classifier classifier = self._get_classifier(task) logits = classifier(word_embs_in_context, sent_mask) out["logits"] = logits out["n_exs"] = get_batch_size(batch) if "labels" in batch: # means we should compute loss if batch["labels"].dim() == 0: labels = batch["labels"].unsqueeze(0) elif batch["labels"].dim() == 1: labels = batch["labels"] else: labels = batch["labels"].squeeze(-1) out["loss"] = F.cross_entropy(logits, labels) tagmask = batch.get("tagmask", None) task.update_metrics(logits, labels, tagmask=tagmask) if predict: if isinstance(task, RegressionTask): if logits.ndimension() > 1: assert ( logits.ndimension() == 2 and logits[-1] == 1 ), "Invalid regression prediction dimensions!" logits = logits.squeeze(-1) out["preds"] = logits else: _, out["preds"] = logits.max(dim=1) return out
def _seq_gen_forward(self, batch, task, predict): """ For variational autoencoder """ out = {} sent, sent_mask = self.sent_encoder(batch["inputs"], task) out["n_exs"] = get_batch_size(batch) if "targs" in batch: pass if predict: pass return out
def _pair_sentence_forward(self, batch, task, predict): out = {} # embed the sentence classifier = self._get_classifier(task) if self.use_bert: sent, mask = self.sent_encoder(batch["inputs"], task) # special case for WiC b/c we want to add representations of particular tokens if isinstance(task, WiCTask): logits = classifier(sent, mask, [batch["idx1"], batch["idx2"]]) else: logits = classifier(sent, mask) else: sent1, mask1 = self.sent_encoder(batch["input1"], task) sent2, mask2 = self.sent_encoder(batch["input2"], task) if isinstance(task, WiCTask): logits = classifier(sent1, sent2, mask1, mask2, [batch["idx1"]], [batch["idx2"]]) else: logits = classifier(sent1, sent2, mask1, mask2) out["logits"] = logits out["n_exs"] = get_batch_size(batch) tagmask = batch.get("tagmask", None) if "labels" in batch: labels = batch["labels"] labels = labels.squeeze(-1) if len(labels.size()) > 1 else labels if isinstance(task, RegressionTask): logits = logits.squeeze(-1) if len(logits.size()) > 1 else logits out["loss"] = F.mse_loss(logits, labels) logits_np = logits.data.cpu().numpy() labels_np = labels.data.cpu().numpy() task.update_metrics(logits_np, labels_np, tagmask=tagmask) else: out["loss"] = F.cross_entropy(logits, labels) task.update_metrics(logits, labels, tagmask=tagmask) if predict: if isinstance(task, RegressionTask): if logits.ndimension() > 1: assert ( logits.ndimension() == 2 and logits[-1] == 1 ), "Invalid regression prediction dimensions!" logits = logits.squeeze(-1) out["preds"] = logits else: _, out["preds"] = logits.max(dim=1) return out
def _tagger_forward(self, batch: dict, task: TaggingTask, predict: bool) -> dict: """ This function is for sequence tagging (one-to-one mapping between words and tags). Args: batch: a dict of inputs and target tags task: TaggingTask predict: (boolean) predict mode (not supported) Returns out: (dict) - 'logits': output layer, dimension: [batchSize * task.max_seq_len, task.num_tags] - 'loss': size average CE loss """ out = {} # batch[inputs] only has one item b_size, seq_len = list(batch["inputs"].values())[0].size() seq_len -= 2 sent_encoder = self.sent_encoder out["n_exs"] = get_batch_size(batch) if not isinstance(sent_encoder, BiLMEncoder): sent, mask = sent_encoder(batch["inputs"], task) sent = sent.masked_fill(1 - mask.byte(), 0) # avoid NaNs sent = sent[:, 1:-1, :] hid2tag = self._get_classifier(task) logits = hid2tag(sent) logits = logits.view(b_size * seq_len, -1) out["logits"] = logits targs = batch["targs"]["words"][:, :seq_len].contiguous().view(-1) if "mask" in batch: # prevent backprop for tags generated for tokenization-introduced tokens # such as word boundaries mask = batch["mask"] batch_mask = [mask[i][:seq_len] for i in range(b_size)] batch_mask = torch.stack(batch_mask) keep_idxs = torch.nonzero(batch_mask.view(-1).data).squeeze() logits = logits.index_select(0, keep_idxs) targs = targs.index_select(0, keep_idxs) pad_idx = self.vocab.get_token_index(self.vocab._padding_token) out["loss"] = F.cross_entropy(logits, targs, ignore_index=pad_idx) task.scorer1(logits, targs) return out
def forward( self, batch: Dict, sent_embs: torch.Tensor, sent_mask: torch.Tensor, task: Task, predict: bool, cuda_devices: Any, ) -> Dict: """ Run forward pass. Expects batch to have the following entries: 'input' : [batch_size, max_len, emb_size] 'labels' : [batch_size, num_targets] of label indices 'span1s' : [batch_size, 1, 2], span indices 'span2s' : [batch_size, 1, 2], span indices . . . 'span_ts': [batch_size, 1, 2], span indices Parameters ------------------------------- batch: dict(str -> Tensor) with entries described above. sent_embs: [batch_size, max_len, repr_dim] Tensor sent_mask: [batch_size, max_len, 1] Tensor of {0,1} task: Task predict: whether or not to generate predictions This learns different span pooling operators for each span. Returns ------------------------------- out: dict(str -> Tensor) """ out = {} # Apply projection CNN layer for each span of the input sentence sent_embs_t = sent_embs.transpose(1, 2) # needed for CNN layer se_projs = [] for i in range(self.num_spans): se_proj = self.projs[i](sent_embs_t).transpose(2, 1).contiguous() se_projs.append(se_proj) model_device = self.projs[0].weight.device span_embs = torch.Tensor([]).cuda( model_device) if torch.cuda.is_available() else torch.Tensor([]) out["n_exs"] = get_batch_size(batch, cuda_devices) _kw = dict(sequence_mask=sent_mask.long()) for i in range(self.num_spans): # spans are [batch_size, num_targets, span_modules] span_emb = self.span_extractors[i]( se_projs[i], batch["span" + str(i + 1) + "s"], **_kw) span_embs = torch.cat([span_embs, span_emb], dim=2) # [batch_size, num_targets, n_classes] logits = self.classifier(span_embs) out["logits"] = logits # Compute loss if requested. if "labels" in batch: logits = logits.squeeze(dim=1) out["logits"] = logits out["loss"] = format_output( self.compute_loss(logits, batch["labels"], task), cuda_devices) if predict: # Return preds as a list. preds = self.get_predictions(logits) out["preds"] = preds return out