Python Instance.get примеры использования

Язык программирования: Python

Пространство имен/Пакет: allennlp.data

Класс/Тип: Instance

Метод/Функция: get

Примеров на hotexamples.com: 5

Python Instance.get - 5 примеров найдено. Это лучшие примеры Python кода для allennlp.data.Instance.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Instance(30)

index_fields(16)

duplicate(12)

as_array_dict(7)

add_field(5)

get(5)

index(3)

indexed(3)

items(3)

as_tensor_dict(1)

human_readable_dict(1)

Пример #1

Показать файл

Файл: doc_classification.py Проект: recognai/biome-text

    def _compute_attributions(
        self,
        single_forward_output: Dict[str, numpy.ndarray],
        instance: Instance,
        n_steps: int = 50,
    ) -> List[List[Attribution]]:
        """Attributes the prediction to the input.

        The attributions are calculated by means of the [Integrated Gradients](https://arxiv.org/abs/1703.01365) method.

        Parameters
        ----------
        single_forward_output
            Non-batched forward output containing numpy arrays
        instance
            The instance containing the input data
        n_steps
            The number of steps used when calculating the attribution of each token.

        Returns
        -------
        attributions
            A list of list of attributions due to the the ListField level
        """
        # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze)
        embeddings = torch.from_numpy(
            single_forward_output["embeddings"]).unsqueeze(0)
        mask = torch.from_numpy(single_forward_output["mask"]).unsqueeze(0)
        logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0)

        ig = IntegratedGradients(self._encoder_and_head_forward)
        attributions, delta = ig.attribute(
            embeddings,
            n_steps=n_steps,
            target=torch.argmax(logits),
            additional_forward_args=mask,
            return_convergence_delta=True,
        )
        attributions = attributions.sum(dim=3).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        document_tokens = [
            cast(TextField, text_field).tokens for text_field in cast(
                ListField, instance.get(self.forward_arg_name))
        ]

        return [[
            Attribution(
                text=token.text,
                start=token.idx,
                end=self._get_token_end(token),
                field=self.forward_arg_name,
                attribution=attribution,
            ) for token, attribution in zip(sentence_tokens,
                                            sentence_attributions)
        ] for sentence_tokens, sentence_attributions in zip(
            document_tokens, attributions)]

Пример #2

Показать файл

Файл: doc_classification.py Проект: radovankavicky/biome-text

    def explain_prediction(
        self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int
    ) -> Dict[str, Any]:
        """Here, we must apply transformations for manage ListFields tensors shapes"""

        dataset = Batch([instance])
        input_tokens_ids = dataset.as_tensor_dict()
        ig = IntegratedGradients(self._explain_embeddings)

        num_wrapping_dims = 1

        document_tokens = [
            [token.text for token in cast(TextField, text_field).tokens]
            for text_field in cast(ListField, instance.get(self.forward_arg_name))
        ]
        document_tensors = input_tokens_ids.get(self.forward_arg_name)
        mask = get_text_field_mask(
            document_tensors, num_wrapping_dims=num_wrapping_dims
        )
        text_embeddings = self.backbone.embedder.forward(
            document_tensors, num_wrapping_dims=num_wrapping_dims
        )

        label_id = vocabulary.index_for_label(
            self.backbone.vocab, prediction.get(self.label_name)
        )
        attributions, delta = ig.attribute(
            text_embeddings,
            target=label_id,
            additional_forward_args=mask,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        attributions = attributions.sum(dim=3).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        return {
            **prediction,
            "explain": {
                self.forward_arg_name: [
                    [
                        {"token": token, "attribution": attribution}
                        for token, attribution in zip(
                            sentence_tokens, sentence_attribution
                        )
                    ]
                    for sentence_tokens, sentence_attribution in zip(
                        document_tokens, attributions
                    )
                ]
            },
        }

Пример #3

Показать файл

    def explain_prediction(
        self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int
    ) -> Dict[str, Any]:

        dataset = Batch([instance])
        input_tokens_ids = dataset.as_tensor_dict()
        ig = IntegratedGradients(self._explain_embeddings)

        num_wrapping_dims = 0

        text_tokens = [
            token.text
            for token in cast(TextField, instance.get(self.forward_arg_name)).tokens
        ]
        text_tensor = input_tokens_ids.get(self.forward_arg_name)
        mask = get_text_field_mask(text_tensor, num_wrapping_dims=num_wrapping_dims)
        text_embeddings = self.backbone.embedder.forward(
            text_tensor, num_wrapping_dims=num_wrapping_dims
        )

        label_id = vocabulary.index_for_label(
            self.backbone.vocab, prediction["labels"][0]
        )
        attributions, delta = ig.attribute(
            text_embeddings,
            n_steps=n_steps,
            target=label_id,
            additional_forward_args=mask,
            return_convergence_delta=True,
        )
        attributions = attributions.sum(dim=2).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.detach().numpy()

        return {
            **prediction,
            "explain": {
                self.forward_arg_name: [
                    {"token": token, "attribution": attribution}
                    for token, attribution in zip(text_tokens, attributions)
                ]
            },
        }

Пример #4

Показать файл

    def explain_prediction(self, prediction: Dict[str, np.array],
                           instance: Instance, n_steps: int) -> Dict[str, Any]:
        """Calculates attributions for each data field in the record by integrating the gradients.

        IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task
        of the two records.

        Parameters
        ----------
        prediction
        instance
        n_steps

        Returns
        -------
        prediction_dict
            The prediction dictionary with a newly added "explain" key
        """
        batch = Batch([instance])
        tokens_ids = batch.as_tensor_dict()

        # 1. Get field encodings
        # TODO(dcfidalgo): optimize: for the prediction we already embedded and field encoded the records.
        #     Also, the forward passes here are always done on cpu!
        field_encoded_record1, record_mask_record1 = self._field_encoding(
            tokens_ids.get(self._RECORD1_ARG_NAME_IN_FORWARD))
        field_encoded_record2, record_mask_record2 = self._field_encoding(
            tokens_ids.get(self._RECORD2_ARG_NAME_IN_FORWARD))
        if not field_encoded_record1.size() == field_encoded_record2.size():
            raise RuntimeError(
                "Both records must have the same number of data fields!")

        # 2. Get attributes
        ig = IntegratedGradients(self._bimpm_forward)

        prediction_target = int(np.argmax(prediction["probs"]))
        ig_attribute_record1 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record2, field_encoded_record2),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )

        ig_attribute_record2 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record1, field_encoded_record1),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work
        # # duplicate case:
        # # Here we integrate each record along the path from the null vector -> record1/2
        # # assuming that the null vector provides the highest "not duplicate" score.
        # if prediction_target == 0:
        #     ig_attribute_record1 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        #     ig_attribute_record2 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        # # not duplicate case:
        # # Here we integrate each record along the path from record2/1 -> record1/2
        # # assuming that the same record provides the highest "duplicate" score.
        # elif prediction_target == 1:
        #     ...
        # else:
        #     raise RuntimeError("The `explain` method is only implemented for a binary classification task: "
        #                        "[duplicate, not_duplicate]")

        attributions_record1, delta_record1 = self._get_attributions_and_delta(
            ig_attribute_record1, 0)
        attributions_record2, delta_record2 = self._get_attributions_and_delta(
            ig_attribute_record2, 1)

        # 3. Get tokens corresponding to the attributions
        field_tokens_record1 = []
        for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD):
            field_tokens_record1.append(" ".join(
                [token.text for token in textfield.tokens]))
        field_tokens_record2 = []
        for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD):
            field_tokens_record2.append(" ".join(
                [token.text for token in textfield.tokens]))

        return {
            **prediction,
            "explain": {
                self._RECORD1_ARG_NAME_IN_FORWARD: [{
                    "token": token,
                    "attribution": attribution
                } for token, attribution in zip(field_tokens_record1,
                                                attributions_record1)],
                self._RECORD2_ARG_NAME_IN_FORWARD: [{
                    "token": token,
                    "attribution": attribution
                } for token, attribution in zip(field_tokens_record2,
                                                attributions_record2)],
            },
        }

Пример #5

Показать файл

Файл: record_pair_classification.py Проект: recognai/biome-text

    def _compute_attributions(
        self,
        single_forward_output: Dict[str, numpy.ndarray],
        instance: Instance,
        n_steps: int = 50,
    ) -> List[Attribution]:
        """Computes attributions for each data field in the record by means of the
        [Integrated Gradients](https://arxiv.org/abs/1703.01365) method.

        IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task
        of the two records.

        Parameters
        ----------
        single_forward_output
            Non-batched forward output containing numpy arrays
        instance
            The instance containing the input data
        n_steps
            The number of steps used when calculating the attribution of each token.

        Returns
        -------
        attributions
        """
        # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze)
        field_encoded_record1 = torch.from_numpy(
            single_forward_output["field_encoded_record1"]).unsqueeze(0)
        record_mask_record1 = torch.from_numpy(
            single_forward_output["record_mask_record1"]).unsqueeze(0)

        field_encoded_record2 = torch.from_numpy(
            single_forward_output["field_encoded_record2"]).unsqueeze(0)
        record_mask_record2 = torch.from_numpy(
            single_forward_output["record_mask_record2"]).unsqueeze(0)

        logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0)

        if not field_encoded_record1.size() == field_encoded_record2.size():
            raise RuntimeError(
                "Both records must have the same number of data fields!")

        # 2. Get attributes
        ig = IntegratedGradients(self._bimpm_forward)

        prediction_target = torch.argmax(logits)

        ig_attribute_record1 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record2, field_encoded_record2),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )

        ig_attribute_record2 = ig.attribute(
            inputs=(field_encoded_record1, field_encoded_record2),
            baselines=(field_encoded_record1, field_encoded_record1),
            additional_forward_args=(record_mask_record1, record_mask_record2),
            target=prediction_target,
            return_convergence_delta=True,
            n_steps=n_steps,
        )
        # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work
        # # duplicate case:
        # # Here we integrate each record along the path from the null vector -> record1/2
        # # assuming that the null vector provides the highest "not duplicate" score.
        # if prediction_target == 0:
        #     ig_attribute_record1 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        #     ig_attribute_record2 = ig.attribute(
        #         inputs=(field_encoded_record1, field_encoded_record2),
        #         baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)),
        #         additional_forward_args=(record_mask_record1, record_mask_record2),
        #         # we fix the target since we want negative integrals always to be associated
        #         # to the "not_duplicate" case and positive ones to the "duplicate" case
        #         target=0,
        #         return_convergence_delta=True,
        #     )
        #
        # # not duplicate case:
        # # Here we integrate each record along the path from record2/1 -> record1/2
        # # assuming that the same record provides the highest "duplicate" score.
        # elif prediction_target == 1:
        #     ...
        # else:
        #     raise RuntimeError("The `compute_attributions` method is only implemented for a binary classification task: "
        #                        "[duplicate, not_duplicate]")

        attributions_record1, delta_record1 = self._get_attributions_and_delta(
            ig_attribute_record1, 0)
        attributions_record2, delta_record2 = self._get_attributions_and_delta(
            ig_attribute_record2, 1)

        # 3. Get tokens corresponding to the attributions
        field_text_record1 = []
        for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD):
            field_text_record1.append(" ".join(
                [token.text for token in textfield.tokens]))
        field_text_record2 = []
        for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD):
            field_text_record2.append(" ".join(
                [token.text for token in textfield.tokens]))

        output_record1 = [
            Attribution(
                text=field_text,
                start=0,
                end=len(field_text),
                field=self._RECORD1_ARG_NAME_IN_FORWARD,
                attribution=attribution,
            ) for field_text, attribution in zip(field_text_record1,
                                                 attributions_record1)
        ]
        output_record2 = [
            Attribution(
                text=field_text,
                start=0,
                end=len(field_text),
                field=self._RECORD2_ARG_NAME_IN_FORWARD,
                attribution=attribution,
            ) for field_text, attribution in zip(field_text_record2,
                                                 attributions_record2)
        ]

        return output_record1 + output_record2