Пример #1
0
 def test_get_single_embedding(self):
     model = get_bert_model()
     device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
     text = "rare bird has more than enough charm to make it memorable."
     embedded_input = _get_single_embedding(model, text, device)
     assert embedded_input is not None
    def explain_local(self, X, y=None, name=None, num_iteration=150):
        """Explain the model by using MSRA's interpretor
        :param X: The text
        :type X: string
        :param y: The predicted label for the sentence
        :type y: string
        :param name: a name for saving the explanation, currently ignored
        :type str
        :param num_iteration: The number of iterations through the optimize function. This is a parameter
            that should be tuned to your dataset. If set to 0, all words will be important as the Loss function
            will not be optimzed. If set to a very high number, all words will not be important as the loss will
            be severly optimized. The more the iterations, slower the explanations.
        :type num_iteration: int
        :return: A model explanation object. It is guaranteed to be a LocalExplanation
        :rtype: DynamicLocalExplanation
        """
        X = _validate_X(X)

        embedded_input, parsed_sentence = _get_single_embedding(
            self.model, X, self.device)
        self.input_embeddings = embedded_input
        self.parsed_sentence = parsed_sentence

        self.input_size = self.input_embeddings.size(0)
        self.input_dimension = self.input_embeddings.size(1)
        self.ratio = nn.Parameter(torch.randn(self.input_size, 1),
                                  requires_grad=True)
        self.input_embeddings.to(self.device)

        if self.regular is None:
            assert self.train_dataset is not None, "Training dataset is required"

            # sample the training dataset
            if len(self.train_dataset) <= self.max_points:
                sampled_train_dataset = self.train_dataset
            else:
                sampled_train_dataset = random.sample(self.train_dataset,
                                                      k=self.max_points)

            training_embeddings = make_bert_embeddings(sampled_train_dataset,
                                                       self.model, self.device)
            regularization = self._calculate_regularization(
                training_embeddings, self.model).tolist()
            self.regular = nn.Parameter(
                torch.tensor(regularization).to(self.input_embeddings),
                requires_grad=False,
            )
            self.Phi = self._generate_Phi(layer=self.target_layer)

        # values below are arbitarily set for now
        self._optimize(num_iteration, lr=0.01, show_progress=True)
        local_importance_values = self._get_sigma()
        self.local_importance_values = local_importance_values
        return _create_local_explanation(
            classification=True,
            text_explanation=True,
            local_importance_values=np.array(local_importance_values)[1:-1],
            method="neural network",
            model_task="classification",
            features=self.parsed_sentence[1:-1],
            classes=self.classes,
            predicted_label=y,
        )