def test_get_single_embedding(self): model = get_bert_model() device = torch.device("cpu" if not torch.cuda.is_available() else "cuda") text = "rare bird has more than enough charm to make it memorable." embedded_input = _get_single_embedding(model, text, device) assert embedded_input is not None
def explain_local(self, X, y=None, name=None, num_iteration=150): """Explain the model by using MSRA's interpretor :param X: The text :type X: string :param y: The predicted label for the sentence :type y: string :param name: a name for saving the explanation, currently ignored :type str :param num_iteration: The number of iterations through the optimize function. This is a parameter that should be tuned to your dataset. If set to 0, all words will be important as the Loss function will not be optimzed. If set to a very high number, all words will not be important as the loss will be severly optimized. The more the iterations, slower the explanations. :type num_iteration: int :return: A model explanation object. It is guaranteed to be a LocalExplanation :rtype: DynamicLocalExplanation """ X = _validate_X(X) embedded_input, parsed_sentence = _get_single_embedding( self.model, X, self.device) self.input_embeddings = embedded_input self.parsed_sentence = parsed_sentence self.input_size = self.input_embeddings.size(0) self.input_dimension = self.input_embeddings.size(1) self.ratio = nn.Parameter(torch.randn(self.input_size, 1), requires_grad=True) self.input_embeddings.to(self.device) if self.regular is None: assert self.train_dataset is not None, "Training dataset is required" # sample the training dataset if len(self.train_dataset) <= self.max_points: sampled_train_dataset = self.train_dataset else: sampled_train_dataset = random.sample(self.train_dataset, k=self.max_points) training_embeddings = make_bert_embeddings(sampled_train_dataset, self.model, self.device) regularization = self._calculate_regularization( training_embeddings, self.model).tolist() self.regular = nn.Parameter( torch.tensor(regularization).to(self.input_embeddings), requires_grad=False, ) self.Phi = self._generate_Phi(layer=self.target_layer) # values below are arbitarily set for now self._optimize(num_iteration, lr=0.01, show_progress=True) local_importance_values = self._get_sigma() self.local_importance_values = local_importance_values return _create_local_explanation( classification=True, text_explanation=True, local_importance_values=np.array(local_importance_values)[1:-1], method="neural network", model_task="classification", features=self.parsed_sentence[1:-1], classes=self.classes, predicted_label=y, )