def test(self, df_test, verbosity=2): """Calculate and store as model attributes: Average classification accuracy using rationales (self.avg_accuracy), Average classification accuracy rationale complements (self.anti_accuracy) Average sparsity of rationales (self.avg_sparsity) :param df_test: dataframe containing test data labels, tokens, masks, and counts :type df_test: pandas dataframe :param verbosity: {0, 1, 2}, default 2 If 0, does not log any output If 1, logs accuracy, anti-rationale accuracy, sparsity, and continuity scores If 2, displays a random test example with rationale and classification :type verbosity: int, optional """ self.model.eval() accuracy = 0 for i in range(len(df_test) // self.args.test_batch_size): test_batch = df_test.iloc[i * self.args.test_batch_size:(i + 1) * self.args.test_batch_size] batch_dict = generate_data(test_batch, self.args.cuda) batch_x_ = batch_dict["x"] batch_m_ = batch_dict["m"] batch_y_ = batch_dict["y"] predict, _, _ = self.model(batch_x_, batch_m_) # do a softmax on the predicted class probabilities _, y_pred = torch.max(predict, dim=1) accuracy += (y_pred == batch_y_).sum().item() self.avg_accuracy = accuracy / len(df_test) self.test_accs.append(self.avg_accuracy) if verbosity > 0: logging.info("train acc: %.4f, test acc: %.4f" % (self.train_accs[-1], self.avg_accuracy)) if self.args.save_best_model: if self.avg_accuracy > self.best_test_acc: logging.info("saving best classifier model and model stats") # save model torch.save( self.model.state_dict(), os.path.join( self.args.model_folder_path, self.args.model_prefix + "gen_classifier.pth", ), ) if self.avg_accuracy > self.best_test_acc: self.best_test_acc = self.avg_accuracy self.epochs_since_improv = 0 else: self.epochs_since_improv += 1
def explain_local(self, text: str, **kwargs) -> _create_local_explanation: """ Create a local explanation for a given text :param text: A segment of text :type text: str :param kwargs: preprocessor: an intialized preprocessor to tokenize the given text with .preprocess() and .decode_single() methods preprocessor: Ex. GlovePreprocessor or BertPreprocessor hard_importances: whether to generate "hard" important/ non-important rationales or float rationale scores, defaults to True hard_importances: bool, optional :return: local explanation object :rtype: DynamicLocalExplanation """ model_args = self.model_config df_dummy_label = pd.DataFrame.from_dict({"labels": [0]}) df_sentence = pd.concat( [df_dummy_label, self.preprocessor.preprocess([text.lower()])], axis=1) batch_dict = generate_data(df_sentence, self.model_config.cuda) x = batch_dict["x"] m = batch_dict["m"] predict_dict = self.predict(df_sentence) zs = predict_dict["rationale"] prediction = predict_dict["predict"] prediction_idx = prediction[0].max(0)[1] prediction = model_args.labels[prediction_idx] zs = np.array(zs.cpu()) if not kwargs['hard_importances']: float_zs = self.model.get_z_scores(df_sentence) float_zs = float_zs[:, :, 1].detach() float_zs = np.array(float_zs.cpu()) # set importances all words not selected as part of the rationale # to zero zs = zs * float_zs # generate human-readable tokens (individual words) seq_len = int(m.sum().item()) ids = x[:seq_len][0] tokens = kwargs['preprocessor'].decode_single(ids) local_explanation = _create_local_explanation( classification=True, text_explanation=True, local_importance_values=zs.flatten(), method=str(type(self.model)), model_task="classification", features=tokens, classes=model_args.labels, predicted_label=prediction, ) return local_explanation
def fit(self, df_train, df_test): """Train the classifier on the training data, with testing at the end of every epoch. :param df_train: training data containing labels, lists of word token ids, pad/word masks, and token counts for each training example :type df_train: pd.DataFrame :param df_test: testing data containing labels, lists of word token ids, pad/word masks, and token counts for each testing example :type df_test: pd.DataFrame """ self.init_optimizer() total_train = len(df_train) indices = np.array(list(range(0, total_train))) for i in tqdm(range(self.num_epochs)): self.model.train() # pytorch fn; sets module to train mode # shuffle the epoch np.random.shuffle(indices) total_train_acc = 0 for i in range(total_train // self.args.train_batch_size): # sample a batch of data start = i * self.args.train_batch_size end = min((i + 1) * self.args.train_batch_size, total_train) batch = df_train.loc[indices[start:end]] batch_dict = generate_data(batch, self.args.cuda) batch_x_ = batch_dict["x"] batch_m_ = batch_dict["m"] batch_y_ = batch_dict["y"] losses, predict = self._train_one_step(batch_x_, batch_y_, batch_m_) # calculate classification accuarcy _, y_pred = torch.max(predict, dim=1) acc = np.float((y_pred == batch_y_).sum().cpu().data.item()) total_train_acc += acc total_acc_percent = total_train_acc / total_train self.train_accs.append(total_acc_percent) self.test(df_test) # stop training if there have been no improvements if self.epochs_since_improv > self.args.training_stop_thresh: break
def get_z_scores(self, df_test): """Get softmaxed rationale importances. :param df_test: dataframe containing test data labels, tokens, masks, and counts :type df_test: pd.DataFrame :return: z_scores: softmaxed rationale scores with dimension (batch_size, length) :rtype: torch.FloatTensor """ batch_dict = generate_data(df_test, self.use_cuda) x_tokens = batch_dict["x"] mask = batch_dict["m"] z_scores, _, _ = self.generator(x_tokens, mask) z_scores = F.softmax(z_scores, dim=-1) return z_scores
def predict(self, df_predict): """ Generate rationales, predictions using rationales, predictions using anti-rationales (complement of generated rationales), and introspective generator classifier predictions for given examples. :param df_predict: data containing labels, lists of word token ids, pad/word masks, and token counts for each testing example :type df_predict: pd.DataFrame :return: Dictionary with fields: "predict": predictions using generated rationales "anti_predict": predictions using complements of generated rationales "cls_predict": predictions from introspective generator, "rationale": mask indicating whether words were used in rationales, :rtype: dict """ self.model.eval() self.model.training = False batch_dict = generate_data(df_predict, self.model_config.cuda) batch_x_ = batch_dict["x"] batch_m_ = batch_dict["m"] forward_dict = self.model.forward(batch_x_, batch_m_) predict = forward_dict["predict"] anti_predict = forward_dict["anti_predict"] cls_predict = forward_dict["cls_predict"] z = forward_dict["z"] predict = predict.detach() anti_predict = anti_predict.detach() cls_predict = cls_predict.detach() z = z.detach() predict_dict = { "predict": predict, "anti_predict": anti_predict, "cls_predict": cls_predict, "rationale": z, } self.model.training = True return predict_dict
def fit(self, df_train, df_test): """Train the model on the training data, with testing at the end of every epoch. :param df_train: training data containing labels, lists of word token ids, pad/word masks, and token counts for each training example :type df_train: pd.DataFrame :param df_test: testing data containing labels, lists of word token ids, pad/word masks, and token counts for each testing example :type df_test: pd.DataFrame """ self._init_optimizers() self._init_rl_optimizers() total_train = len(df_train) indices = np.array(list(range(0, total_train))) for i in tqdm(range(self.num_epochs)): self.train() # pytorch fn; sets module to train mode # shuffle the data in this epoch np.random.shuffle(indices) total_train_acc = 0 for i in range(total_train // self.train_batch_size): # sample a batch of data start = i * self.train_batch_size end = min((i + 1) * self.train_batch_size, total_train) batch = df_train.loc[indices[start:end]] batch_dict = generate_data(batch, self.use_cuda) batch_x_ = batch_dict["x"] batch_m_ = batch_dict["m"] batch_y_ = batch_dict["y"] z_baseline = Variable( torch.FloatTensor([float(np.mean(self.z_history_rewards)) ])) if self.use_cuda: z_baseline = z_baseline.cuda() losses, predict, anti_predict, cls_predict, z, z_rewards =\ self._train_one_step(batch_x_, batch_y_, z_baseline, batch_m_) z_batch_reward = np.mean(z_rewards.cpu().data.numpy()) self.z_history_rewards.append(z_batch_reward) # calculate classification accuarcy _, y_pred = torch.max(predict, dim=1) acc = np.float((y_pred == batch_y_).sum().cpu().data.item()) total_train_acc += acc total_acc_percent = total_train_acc / total_train self.train_accs.append(total_acc_percent) self.test(df_test) if self.epochs_since_improv > self.training_stop_thresh: break
def test(self, df_test, verbosity=2): """Calculate and store as model attributes: Average classification accuracy using rationales (self.avg_accuracy), Average classification accuracy rationale complements (self.anti_accuracy) Average sparsity of rationales (self.avg_sparsity) :param df_test: dataframe containing test data labels, tokens, masks, and counts :type df_test: pandas dataframe :param verbosity: {0, 1, 2}, default 2 If 0, does not log any output If 1, logs accuracy, anti-rationale accuracy, sparsity, and continuity scores If 2, displays a random test example with rationale and classification :type verbosity: int, optional """ self.eval() accuracy = 0 anti_accuracy = 0 sparsity_total = 0 cont_total = 0 for i in range(len(df_test) // self.test_batch_size): test_batch = df_test.iloc[i * self.test_batch_size:(i + 1) * self.test_batch_size] batch_dict = generate_data(test_batch, self.use_cuda) batch_x_ = batch_dict["x"] batch_m_ = batch_dict["m"] batch_y_ = batch_dict["y"] forward_dict = self.forward(batch_x_, batch_m_) predict = forward_dict["predict"] anti_predict = forward_dict["anti_predict"] z = forward_dict["z"] # do a softmax on the predicted class probabilities _, y_pred = torch.max(predict, dim=1) _, anti_y_pred = torch.max(anti_predict, dim=1) accuracy += (y_pred == batch_y_).sum().item() anti_accuracy += (anti_y_pred == batch_y_).sum().item() # calculate sparsity sparsity_ratios = self._get_sparsity(z, batch_m_) sparsity_total += sparsity_ratios.sum().item() cont_ratios = self._get_continuity(z, batch_m_) cont_total += cont_ratios.sum().item() self.avg_accuracy = accuracy / len(df_test) self.test_accs.append(self.avg_accuracy) self.avg_anti_accuracy = anti_accuracy / len(df_test) self.avg_sparsity = sparsity_total / len(df_test) self.avg_continuity = cont_total / len(df_test) if verbosity > 0: logging.info("test acc: %.4f test anti acc: %.4f" % (self.avg_accuracy, self.avg_anti_accuracy)) logging.info("test sparsity: %.4f test continuity: %.4f" % (self.avg_sparsity, self.avg_continuity)) if verbosity > 1: rand_idx = random.randint(0, self.test_batch_size - 1) # display a random example logging.info("Gold Label: " + str(batch_y_[rand_idx].item()) + " Pred label: " + str(y_pred[rand_idx].item())) logging.info( self.display_example(batch_x_[rand_idx], batch_m_[rand_idx], z[rand_idx])) if self.args.save_best_model: if self.avg_accuracy > self.best_test_acc: logging.info("saving best model and model stats") # save model torch.save( self.state_dict(), os.path.join( self.args.model_folder_path, self.args.model_prefix + ".pth", ), ) if self.best_test_acc > self.avg_accuracy: self.best_test_acc = self.avg_accuracy self.epochs_since_improv = 0 else: self.epochs_since_improv += 1
def explain_local(self, X, y=None, name=None) -> LocalExplanation: """ Create a local explanation for a given text :param X: String to be explained. :type X: str :param y: The ground truth label for the sentence :type y: string :param name: a name for saving the explanation, currently ignored :type str :return: local explanation object :rtype: DynamicLocalExplanation """ X = _validate_X(X) model_args = self.model_config df_dummy_label = pd.DataFrame.from_dict({"labels": [0]}) df_sentence = pd.concat( [df_dummy_label, self.preprocessor.preprocess([X.lower()])], axis=1) batch_dict = generate_data(df_sentence, self.model_config.cuda) x = batch_dict["x"] m = batch_dict["m"] predict_dict = self.predict(df_sentence) zs = predict_dict["rationale"] prediction = predict_dict["predict"] prediction_idx = prediction[0].max(0)[1] prediction = model_args.labels[prediction_idx] zs = np.array(zs.cpu()) # The not hard_importance condition was implied, ids is undefined otherwise float_zs = self.model.get_z_scores(df_sentence) float_zs = float_zs[:, :, 1].detach() float_zs = np.array(float_zs.cpu()) # set importances all words not selected as part of the rationale # to zero zs = zs * float_zs # generate human-readable tokens (individual words) seq_len = int(m.sum().item()) ids = x[:seq_len][0] tokens = self.preprocessor.decode_single(ids) local_importance_values = zs.flatten() # post-processing for BERT to remove SEP and CLS tokens # TODO: might we want to add a "post-process" method to the preprocessor? tokens_to_remove = [BertTokens.SEP, BertTokens.CLS] token_indexes = [ idx for idx, token in enumerate(tokens) if token in tokens_to_remove ] if token_indexes: local_importance_values = np.delete(local_importance_values, token_indexes) for token_index in sorted(token_indexes, reverse=True): del tokens[token_index] local_explanation = _create_local_explanation( classification=True, text_explanation=True, local_importance_values=local_importance_values, method=str(type(self.model)), model_task="classification", features=tokens, classes=model_args.labels, predicted_label=prediction, ) return local_explanation