Пример #1
0
 def test_get_text_mask_from_representations(self):
     token_representations = torch.FloatTensor([[[0.1, 0.2], [0.3, 0.4],
                                                 [0.5, 0.6], [0, 0], [0,
                                                                      0]],
                                                [[0.1, 0.2], [0.3, 0.4],
                                                 [0, 0], [0, 0], [0, 0]],
                                                [[0.1, 0.2], [0.3, 0.4],
                                                 [0.5, 0.6], [0.1, 0.1],
                                                 [0.1, 0.1]],
                                                [[0.1, 0.2], [0.3, 0.4],
                                                 [0.5, 0.6], [0.2, 0.3],
                                                 [0, 0]]])
     mask = get_text_mask_from_representations(token_representations)
     assert_allclose(
         mask.cpu().numpy(),
         np.array([[1, 1, 1, 0, 0], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1],
                   [1, 1, 1, 1, 0]]))
    def forward(
            self,  # type: ignore
            label_indices: torch.LongTensor,
            token_representations: torch.FloatTensor = None,
            raw_tokens: List[List[str]] = None,
            labels: torch.LongTensor = None,
            **kwargs) -> Dict[str, torch.Tensor]:
        """
        If ``token_representations`` is provided, ``tokens`` is not required. If
        ``token_representations`` is ``None``, then ``tokens`` is required.

        Parameters
        ----------
        label_indices : torch.LongTensor
            A LongTensor of shape (batch_size, max_num_adpositions) with the tokens
            to predict a label for for each element (sentence) in the batch.
        token_representations : torch.FloatTensor, optional (default = None)
            A tensor of shape (batch_size, sequence_length, representation_dim) with
            the represenatation of the first token. If None, we use a contextualizer
            within this model to produce the token representation.
        raw_tokens : List[List[str]], optional (default = None)
            A batch of lists with the raw token strings. Used to compute
            token_representations, if either are None.
        labels : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_label_indices)``.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_label_indices,
            num_classes)`` representing unnormalized log probabilities
            of the classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_label_indices,
            num_classes)`` representing a distribution of the tag classes.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimized.
        """
        # Convert to LongTensor
        # TODO: add PR to ArrayField to preserve array types.
        label_indices = label_indices.long()
        if token_representations is None:
            if self._contextualizer is None:
                raise ConfigurationError(
                    "token_representation not provided as input to the model, and no "
                    "contextualizer was specified. Either add a contextualizer to your "
                    "dataset reader (preferred if your contextualizer is frozen) or to "
                    "this model (if you wish to train your contextualizer).")
            if raw_tokens is None:
                raise ValueError(
                    "Input raw_tokens is ``None`` --- make sure to set "
                    "include_raw_tokens in the DatasetReader to True.")
            if label_indices is None:
                raise ValueError("Did not recieve any token indices, needed "
                                 "if the contextualizer is within the model.")
            # Convert contextualizer output into a tensor
            # Shape: (batch_size, max_seq_len, representation_dim)
            token_representations, _ = pad_contextualizer_output(
                self._contextualizer(raw_tokens))

        # Move token representation to the same device as the
        # module (CPU or CUDA). TODO(nfliu): This only works if the module
        # is on one device.
        device = next(self._decoder._linear_layers[0].parameters()).device
        token_representations = token_representations.to(device)
        text_mask = get_text_mask_from_representations(token_representations)
        text_mask = text_mask.to(device)
        label_mask = self._get_label_mask_from_label_indices(label_indices)
        label_mask = label_mask.to(device)

        # Mask out the -1 padding in the label_indices, since that doesn't
        # work with indexing. Note that we can't 0 pad because 0 is actually
        # a valid label index, so we pad with -1 just for the purposes of
        # proper mask calculation and then convert to 0-padding by applying
        # the mask.
        label_indices = label_indices * label_mask

        # Encode the token representation.
        encoded_token_representations = self._encoder(token_representations,
                                                      text_mask)

        batch_size = label_indices.size(0)
        # Index into the encoded_token_representations to get tensors corresponding
        # to the representations of the tokens to predict labels for.
        # Shape: (batch_size, num_label_indices, representation_dim)
        range_vector = get_range_vector(
            batch_size, get_device_of(label_indices)).unsqueeze(1)
        selected_token_representations = encoded_token_representations[
            range_vector, label_indices]
        selected_token_representations = selected_token_representations.contiguous(
        )

        # Decode out a label from the token representation
        # Shape: (batch_size, num_label_indices, num_classes)
        logits = self._decoder(selected_token_representations)
        class_probabilities = F.softmax(logits, dim=-1)
        output_dict = {
            "logits": logits,
            "class_probabilities": class_probabilities
        }
        if labels is not None:
            loss = sequence_cross_entropy_with_logits(
                logits, labels, label_mask, average=self.loss_average)
            for name, metric in self.metrics.items():
                # When not running in error analysis mode, skip
                # metrics that start with "_"
                if not self.error_analysis and name.startswith("_"):
                    continue
                metric(logits, labels, label_mask.float())
            output_dict["loss"] = loss
        return output_dict
Пример #3
0
    def forward(self,  # type: ignore
                token_representations: torch.FloatTensor = None,
                raw_tokens: List[List[str]] = None,
                labels: torch.LongTensor = None,
                **kwargs) -> Dict[str, torch.Tensor]:
        """
        Parameters
        ----------
        token_representations : torch.FloatTensor, optional (default = None)
            A padded tensor of shape (batch_size, seq_len, representation_dim),
            with the represenatations of the tokens. If None, we use a contextualizer within
            this model to produce the token representation.
        raw_tokens : List[List[str]], optional (default = None)
            A batch of lists with the raw token strings. Used to compute token_representations
            if it is None.
        labels : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.

        Retpurns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        if token_representations is None:
            if self._contextualizer is None:
                raise ConfigurationError(
                    "token_representations not provided as input to the model, and no "
                    "contextualizer was specified. Either add a contextualizer to your "
                    "dataset reader (preferred if your contextualizer is frozen) or to "
                    "this model (if you wish to train your contextualizer).")
            if raw_tokens is None:
                raise ValueError("Input raw_tokens is ``None`` and token representations "
                                 "were not provided!")
            token_representations, mask = pad_contextualizer_output(
                self._contextualizer(raw_tokens))
            # Move token representations to the same device as the
            # module (CPU or CUDA). TODO(nfliu): This only works if the module
            # is on one device.
            device = next(self._decoder._module._linear_layers[0].parameters()).device
            token_representations = token_representations.to(device)
            mask = mask.to(device)
        else:
            mask = get_text_mask_from_representations(token_representations)

        batch_size, sequence_length = mask.size()

        # Encode the token representations.
        encoded_token_representations = self._encoder(token_representations, mask)

        logits = self._decoder(encoded_token_representations)

        output_dict = {}
        # Run CRF if provided and calculate class_probabilities
        if self._crf:
            best_paths = self._crf.viterbi_tags(logits, mask)
            # Just get the tags and ignore the score.
            predicted_tags = [x for x, y in best_paths]
            # Add tags to output dict
            output_dict["tags"] = predicted_tags
            # Get the class probabilities from the viterbi tags
            class_probabilities = logits * 0.
            for i, instance_tags in enumerate(predicted_tags):
                for j, tag_id in enumerate(instance_tags):
                    class_probabilities[i, j, tag_id] = 1
        else:
            reshaped_log_probs = logits.view(-1, self._num_classes)
            class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view(
                [batch_size, sequence_length, self._num_classes])

        output_dict["logits"] = logits
        output_dict["mask"] = mask
        output_dict["class_probabilities"] = class_probabilities

        if labels is not None:
            if self._crf:
                # Add negative log-likelihood as loss
                log_likelihood = self._crf(logits, labels, mask)
                loss = -log_likelihood
            else:
                loss = sequence_cross_entropy_with_logits(logits, labels, mask,
                                                          average=self.loss_average)

            for name, metric in self.metrics.items():
                # When not running in error analysis mode, skip
                # metrics that start with "_"
                if not self.error_analysis and name.startswith("_"):
                    continue
                if name == "perplexity":
                    # Perplexity metric API is a bit different from the others.
                    metric(loss, mask.float().sum())
                else:
                    metric(class_probabilities, labels, mask.float())
            output_dict["loss"] = loss
        return output_dict