def _compute_attributions( self, single_forward_output: Dict[str, numpy.ndarray], instance: Instance, n_steps: int = 50, ) -> List[List[Attribution]]: """Attributes the prediction to the input. The attributions are calculated by means of the [Integrated Gradients](https://arxiv.org/abs/1703.01365) method. Parameters ---------- single_forward_output Non-batched forward output containing numpy arrays instance The instance containing the input data n_steps The number of steps used when calculating the attribution of each token. Returns ------- attributions A list of list of attributions due to the the ListField level """ # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze) embeddings = torch.from_numpy( single_forward_output["embeddings"]).unsqueeze(0) mask = torch.from_numpy(single_forward_output["mask"]).unsqueeze(0) logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0) ig = IntegratedGradients(self._encoder_and_head_forward) attributions, delta = ig.attribute( embeddings, n_steps=n_steps, target=torch.argmax(logits), additional_forward_args=mask, return_convergence_delta=True, ) attributions = attributions.sum(dim=3).squeeze(0) attributions = attributions / torch.norm(attributions) attributions = attributions.detach().numpy() document_tokens = [ cast(TextField, text_field).tokens for text_field in cast( ListField, instance.get(self.forward_arg_name)) ] return [[ Attribution( text=token.text, start=token.idx, end=self._get_token_end(token), field=self.forward_arg_name, attribution=attribution, ) for token, attribution in zip(sentence_tokens, sentence_attributions) ] for sentence_tokens, sentence_attributions in zip( document_tokens, attributions)]
def explain_prediction( self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int ) -> Dict[str, Any]: """Here, we must apply transformations for manage ListFields tensors shapes""" dataset = Batch([instance]) input_tokens_ids = dataset.as_tensor_dict() ig = IntegratedGradients(self._explain_embeddings) num_wrapping_dims = 1 document_tokens = [ [token.text for token in cast(TextField, text_field).tokens] for text_field in cast(ListField, instance.get(self.forward_arg_name)) ] document_tensors = input_tokens_ids.get(self.forward_arg_name) mask = get_text_field_mask( document_tensors, num_wrapping_dims=num_wrapping_dims ) text_embeddings = self.backbone.embedder.forward( document_tensors, num_wrapping_dims=num_wrapping_dims ) label_id = vocabulary.index_for_label( self.backbone.vocab, prediction.get(self.label_name) ) attributions, delta = ig.attribute( text_embeddings, target=label_id, additional_forward_args=mask, return_convergence_delta=True, n_steps=n_steps, ) attributions = attributions.sum(dim=3).squeeze(0) attributions = attributions / torch.norm(attributions) attributions = attributions.detach().numpy() return { **prediction, "explain": { self.forward_arg_name: [ [ {"token": token, "attribution": attribution} for token, attribution in zip( sentence_tokens, sentence_attribution ) ] for sentence_tokens, sentence_attribution in zip( document_tokens, attributions ) ] }, }
def explain_prediction( self, prediction: Dict[str, numpy.array], instance: Instance, n_steps: int ) -> Dict[str, Any]: dataset = Batch([instance]) input_tokens_ids = dataset.as_tensor_dict() ig = IntegratedGradients(self._explain_embeddings) num_wrapping_dims = 0 text_tokens = [ token.text for token in cast(TextField, instance.get(self.forward_arg_name)).tokens ] text_tensor = input_tokens_ids.get(self.forward_arg_name) mask = get_text_field_mask(text_tensor, num_wrapping_dims=num_wrapping_dims) text_embeddings = self.backbone.embedder.forward( text_tensor, num_wrapping_dims=num_wrapping_dims ) label_id = vocabulary.index_for_label( self.backbone.vocab, prediction["labels"][0] ) attributions, delta = ig.attribute( text_embeddings, n_steps=n_steps, target=label_id, additional_forward_args=mask, return_convergence_delta=True, ) attributions = attributions.sum(dim=2).squeeze(0) attributions = attributions / torch.norm(attributions) attributions = attributions.detach().numpy() return { **prediction, "explain": { self.forward_arg_name: [ {"token": token, "attribution": attribution} for token, attribution in zip(text_tokens, attributions) ] }, }
def explain_prediction(self, prediction: Dict[str, np.array], instance: Instance, n_steps: int) -> Dict[str, Any]: """Calculates attributions for each data field in the record by integrating the gradients. IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task of the two records. Parameters ---------- prediction instance n_steps Returns ------- prediction_dict The prediction dictionary with a newly added "explain" key """ batch = Batch([instance]) tokens_ids = batch.as_tensor_dict() # 1. Get field encodings # TODO(dcfidalgo): optimize: for the prediction we already embedded and field encoded the records. # Also, the forward passes here are always done on cpu! field_encoded_record1, record_mask_record1 = self._field_encoding( tokens_ids.get(self._RECORD1_ARG_NAME_IN_FORWARD)) field_encoded_record2, record_mask_record2 = self._field_encoding( tokens_ids.get(self._RECORD2_ARG_NAME_IN_FORWARD)) if not field_encoded_record1.size() == field_encoded_record2.size(): raise RuntimeError( "Both records must have the same number of data fields!") # 2. Get attributes ig = IntegratedGradients(self._bimpm_forward) prediction_target = int(np.argmax(prediction["probs"])) ig_attribute_record1 = ig.attribute( inputs=(field_encoded_record1, field_encoded_record2), baselines=(field_encoded_record2, field_encoded_record2), additional_forward_args=(record_mask_record1, record_mask_record2), target=prediction_target, return_convergence_delta=True, n_steps=n_steps, ) ig_attribute_record2 = ig.attribute( inputs=(field_encoded_record1, field_encoded_record2), baselines=(field_encoded_record1, field_encoded_record1), additional_forward_args=(record_mask_record1, record_mask_record2), target=prediction_target, return_convergence_delta=True, n_steps=n_steps, ) # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work # # duplicate case: # # Here we integrate each record along the path from the null vector -> record1/2 # # assuming that the null vector provides the highest "not duplicate" score. # if prediction_target == 0: # ig_attribute_record1 = ig.attribute( # inputs=(field_encoded_record1, field_encoded_record2), # baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2), # additional_forward_args=(record_mask_record1, record_mask_record2), # # we fix the target since we want negative integrals always to be associated # # to the "not_duplicate" case and positive ones to the "duplicate" case # target=0, # return_convergence_delta=True, # ) # # ig_attribute_record2 = ig.attribute( # inputs=(field_encoded_record1, field_encoded_record2), # baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)), # additional_forward_args=(record_mask_record1, record_mask_record2), # # we fix the target since we want negative integrals always to be associated # # to the "not_duplicate" case and positive ones to the "duplicate" case # target=0, # return_convergence_delta=True, # ) # # # not duplicate case: # # Here we integrate each record along the path from record2/1 -> record1/2 # # assuming that the same record provides the highest "duplicate" score. # elif prediction_target == 1: # ... # else: # raise RuntimeError("The `explain` method is only implemented for a binary classification task: " # "[duplicate, not_duplicate]") attributions_record1, delta_record1 = self._get_attributions_and_delta( ig_attribute_record1, 0) attributions_record2, delta_record2 = self._get_attributions_and_delta( ig_attribute_record2, 1) # 3. Get tokens corresponding to the attributions field_tokens_record1 = [] for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD): field_tokens_record1.append(" ".join( [token.text for token in textfield.tokens])) field_tokens_record2 = [] for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD): field_tokens_record2.append(" ".join( [token.text for token in textfield.tokens])) return { **prediction, "explain": { self._RECORD1_ARG_NAME_IN_FORWARD: [{ "token": token, "attribution": attribution } for token, attribution in zip(field_tokens_record1, attributions_record1)], self._RECORD2_ARG_NAME_IN_FORWARD: [{ "token": token, "attribution": attribution } for token, attribution in zip(field_tokens_record2, attributions_record2)], }, }
def _compute_attributions( self, single_forward_output: Dict[str, numpy.ndarray], instance: Instance, n_steps: int = 50, ) -> List[Attribution]: """Computes attributions for each data field in the record by means of the [Integrated Gradients](https://arxiv.org/abs/1703.01365) method. IMPORTANT: The calculated attributions only make sense for a duplicate/not_duplicate binary classification task of the two records. Parameters ---------- single_forward_output Non-batched forward output containing numpy arrays instance The instance containing the input data n_steps The number of steps used when calculating the attribution of each token. Returns ------- attributions """ # captum needs `torch.Tensor`s and we need a batch dimension (-> unsqueeze) field_encoded_record1 = torch.from_numpy( single_forward_output["field_encoded_record1"]).unsqueeze(0) record_mask_record1 = torch.from_numpy( single_forward_output["record_mask_record1"]).unsqueeze(0) field_encoded_record2 = torch.from_numpy( single_forward_output["field_encoded_record2"]).unsqueeze(0) record_mask_record2 = torch.from_numpy( single_forward_output["record_mask_record2"]).unsqueeze(0) logits = torch.from_numpy(single_forward_output["logits"]).unsqueeze(0) if not field_encoded_record1.size() == field_encoded_record2.size(): raise RuntimeError( "Both records must have the same number of data fields!") # 2. Get attributes ig = IntegratedGradients(self._bimpm_forward) prediction_target = torch.argmax(logits) ig_attribute_record1 = ig.attribute( inputs=(field_encoded_record1, field_encoded_record2), baselines=(field_encoded_record2, field_encoded_record2), additional_forward_args=(record_mask_record1, record_mask_record2), target=prediction_target, return_convergence_delta=True, n_steps=n_steps, ) ig_attribute_record2 = ig.attribute( inputs=(field_encoded_record1, field_encoded_record2), baselines=(field_encoded_record1, field_encoded_record1), additional_forward_args=(record_mask_record1, record_mask_record2), target=prediction_target, return_convergence_delta=True, n_steps=n_steps, ) # The code below was an attempt to make attributions for the "duplicate case" more meaningful ... did not work # # duplicate case: # # Here we integrate each record along the path from the null vector -> record1/2 # # assuming that the null vector provides the highest "not duplicate" score. # if prediction_target == 0: # ig_attribute_record1 = ig.attribute( # inputs=(field_encoded_record1, field_encoded_record2), # baselines=(torch.zeros_like(field_encoded_record1), field_encoded_record2), # additional_forward_args=(record_mask_record1, record_mask_record2), # # we fix the target since we want negative integrals always to be associated # # to the "not_duplicate" case and positive ones to the "duplicate" case # target=0, # return_convergence_delta=True, # ) # # ig_attribute_record2 = ig.attribute( # inputs=(field_encoded_record1, field_encoded_record2), # baselines=(field_encoded_record1, torch.zeros_like(field_encoded_record2)), # additional_forward_args=(record_mask_record1, record_mask_record2), # # we fix the target since we want negative integrals always to be associated # # to the "not_duplicate" case and positive ones to the "duplicate" case # target=0, # return_convergence_delta=True, # ) # # # not duplicate case: # # Here we integrate each record along the path from record2/1 -> record1/2 # # assuming that the same record provides the highest "duplicate" score. # elif prediction_target == 1: # ... # else: # raise RuntimeError("The `compute_attributions` method is only implemented for a binary classification task: " # "[duplicate, not_duplicate]") attributions_record1, delta_record1 = self._get_attributions_and_delta( ig_attribute_record1, 0) attributions_record2, delta_record2 = self._get_attributions_and_delta( ig_attribute_record2, 1) # 3. Get tokens corresponding to the attributions field_text_record1 = [] for textfield in instance.get(self._RECORD1_ARG_NAME_IN_FORWARD): field_text_record1.append(" ".join( [token.text for token in textfield.tokens])) field_text_record2 = [] for textfield in instance.get(self._RECORD2_ARG_NAME_IN_FORWARD): field_text_record2.append(" ".join( [token.text for token in textfield.tokens])) output_record1 = [ Attribution( text=field_text, start=0, end=len(field_text), field=self._RECORD1_ARG_NAME_IN_FORWARD, attribution=attribution, ) for field_text, attribution in zip(field_text_record1, attributions_record1) ] output_record2 = [ Attribution( text=field_text, start=0, end=len(field_text), field=self._RECORD2_ARG_NAME_IN_FORWARD, attribution=attribution, ) for field_text, attribution in zip(field_text_record2, attributions_record2) ] return output_record1 + output_record2