def __init__(self, model_path=None, cuda_device=1): # model_path = model_path or LSTM_MODEL_PATH model_path = model_path or ROBERTA_MODEL_PATH self.predictor = Predictor.from_path(model_path, cuda_device=cuda_device) _tokenizer = PretrainedTransformerTokenizer( model_name="roberta-base", max_length=TRANSFORMER_WORDPIECE_LIMIT) class_name_mapper = {"0": "Negative", "1": "Positive"} _model = self.predictor._model _label_namespace = _model._label_namespace class_names = [ class_name_mapper[_model.vocab.get_index_to_token_vocabulary( _label_namespace).get(0)], class_name_mapper[_model.vocab.get_index_to_token_vocabulary( _label_namespace).get(1)] ] # reset the tokenizer to remove separators self.tokenizer = lambda s: [ t.text.replace("Ġ", "").replace('Ċ', '').replace('ĉ', "") for t in _tokenizer.tokenize(s) ][1:-1] self.explainer_lime = LimeTextExplainer( class_names=class_names, split_expression=self.tokenizer) self.explainer_integrate = IntegratedGradient(self.predictor) self.explainer_simple = SimpleGradient(self.predictor)
def run(self, input_text, print_results=True): output = self.classify_text(input_text, True) outputlen = len(output) classes = output[0:int((outputlen / 2) - 1)] scores = output[int((outputlen / 2)):outputlen - 1] if print_results: print('Intents: ' + '\t'.join(map(str, classes))) print('Scores: ' + '\t'.join(map(str, scores))) # explain class explainer = LimeTextExplainer(class_names=classes) exp = explainer.explain_instance(input_text, self.classify_text, num_features=7, top_labels=3, num_samples=1000) # print explanation if print_results: print("") print('Explanation for class %s' % classes[0]) print('\n'.join(map(str, exp.as_list(label=0)))) return exp
def explain_one_example(self, idx=None, num_features=5, print_out=True): '''Explaines predictions for a single datapoint with LIME. If the index of the datapoint is not specified, explaines random point from the validation data. Optionally prints out explanation. # Arguments: idx: int, index of a datapoint in the validation data (default=None) num_features: int, number of explanatory features (default=5) print_out: boolean (default=True) # Returns: exp: lime.explanation.Explanation object ''' if idx is None: idx = np.random.choice(self.for_explanation.index) explainer = LimeTextExplainer(class_names=self.class_names) exp = explainer.explain_instance(self.for_explanation[idx], self.predict_proba, num_features=num_features) if print_out: print('Tweet {}: {}'.format(idx, self.for_explanation[idx])) print(self.predict_proba([self.for_explanation[idx]])) print(exp.as_pyplot_figure()) plt.show() return exp
def classifier(request, format=None): tm_classifier = Pickle.objects.get(name='clf') classifier = tm_classifier.pickled_model tm_vectorizer = Pickle.objects.get(name='tfidf') vectorizer = tm_vectorizer.pickled_model input_text = request.data.get('description', 'ERROR') if not input_text: response = {'error': 'Input is an empty string'} return Response(response, status=status.HTTP_404_NOT_FOUND) standardized_text = standardize_text(input_text) explainer = LimeTextExplainer(class_names=GRADE_CATEGORIES) c = make_pipeline(vectorizer, classifier) exp = explainer.explain_instance(standardized_text, c.predict_proba, num_features=6, labels=[0, 1, 2, 3]) predict_probas = dict(zip(exp.class_names, exp.predict_proba)) prediction = max(predict_probas.items(), key=itemgetter(1))[0] response = { 'final_prediction': prediction, 'ordered_class_names': exp.class_names, 'predict_probas': predict_probas, 'as_list': { exp.class_names[lbl]: exp.as_list(label=lbl) for lbl in exp.available_labels() }, 'standardized_text': standardized_text } return Response(response, status=status.HTTP_201_CREATED)
def finalExplain_n(codes): resData = [] r = Rake() classNames = ['negative', 'positive'] exp = LimeTextExplainer(class_names=classNames) for j, code in enumerate(codes): tmpResult = {} c = translate(code) com = '' for i in range(1, len(c)): if c[i] == '</s>': break com += c[i] + ' ' tmpResult['code'] = code tmpResult['comment'] = com r.extract_keywords_from_text(com) comKeys = r.get_ranked_phrases() tmpResult['commentKeywords'] = comKeys tmpList = [] for _key in comKeys: global key key = _key tmpExp = { 'commentKeyword': key, } explanation = exp.explain_instance(code, predictorLime, num_features=6) print(explanation.as_list()) tmpExp['lime'] = explanation.as_list tmpList.append(tmpExp) tmpResult['explanations'] = tmpList resData.append(tmpResult) return resData
def get_lime(model, test_tokens, model_name): explainer = LimeTextExplainer(class_names=["genuine", "deceptive"], split_expression=u'\s+') W = [] for idx, text in enumerate(test_tokens): tmp_d = {} for i in text.split(): tmp_d[i] = 1 exp = explainer.explain_instance(text, partial(wrapper_clf_predict, model=model, model_name=model_name), num_features=len(text.split()), num_samples=1000) if len(tmp_d) != len(exp.as_list()): print(idx, len(tmp_d), len(dict(exp.as_list()))) W.append(dict(exp.as_list())) if (idx + 1) % 10 == 0: print('{} instances have been processed..'.format(idx + 1)) features_l, scores_l = [], [] for d in W: features, scores = [], [] for key, score in d.items(): features.append(key) tmp = ' '.join(features) scores.append(score) # abs value should be taken subsequently features_l.append(tmp) scores_l.append(scores) return features_l, scores_l
def test_lime_text_tabular_not_equal_random_state(self): categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer( class_names=class_names, random_state=10) exp_1 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) explainer = LimeTextExplainer( class_names=class_names, random_state=20) exp_2 = explainer.explain_instance(newsgroups_test.data[83], c.predict_proba, num_features=6) self.assertFalse(exp_1.as_map() == exp_2.as_map())
def get_result_per_word(self, text, num_samples): if not self.intention_names: return {} explainer = LimeTextExplainer(class_names=self.intention_names) labels = list(range(len(self.intention_names))) # List try: exp = explainer.explain_instance(text, self.parse, num_features=6, labels=labels, num_samples=num_samples) except ValueError: labels = [] result_per_word = {} for label in labels: for j in exp.as_list(label=label): if j[0] not in result_per_word: result_per_word[j[0]] = [] result_per_word[j[0]].append({ "intent": self.intention_names[label], "relevance": j[1] * 100 }) for word in result_per_word: result_per_word[word] = sorted(result_per_word[word], key=lambda k: k.get("relevance"), reverse=True) return result_per_word
def get_result_per_intent(self, text, num_samples): explainer = LimeTextExplainer(class_names=self.intention_names) labels = list(range(len(self.intention_names))) # List exp = explainer.explain_instance(text, self.parse, num_features=6, labels=labels, num_samples=num_samples) result_per_intent = {} for intent in self.intention_names: result_per_intent[intent] = [] for i in labels: intent_sum = 0 for j in exp.as_list(label=i): result_per_intent[self.intention_names[i]].append({ "word": j[0], "relevance": j[1] * 100 }) intent_sum += j[1] result_per_intent[self.intention_names[i]].append({ "sum": intent_sum, "relevance": -1 }) for intent in result_per_intent: result_per_intent[intent] = sorted( result_per_intent[intent], key=lambda k: k.get("relevance"), reverse=True, ) return result_per_intent
def __init__(self, classifier, text, one_by_one=False, tokenise=lambda txt: txt.split(), class_names=[0, 1], mask=u"[mask]", threshold=0.2, reshape_predictions=True): """ Given a classifier and a tokenisation method LimeUsd returns the toxic words and the respective offsets. This implementation is based on LIME. :param classifier: any toxicity classifier that predicts a text as toxic or not :param text: the textual input (sentence or document) as a string :param one_by_one: some classifiers may require one by one classification when scoring the "ablated" texts. :param tokenise: by default splits the words on empty space -- same as LIME :param class_names: by default "toxic" is represented by 1 and "civil" by 0 :param mask: the pseudo token to mask the toxic word (for visualisation purposes) :param threshold: above this value the text is predicted toxic (default 0.2) :param reshape_predictions: flattens the output, some classifiers may required this to be set to False """ self.class_names = class_names self.classifier = classifier self.mask = mask self.one_by_one = one_by_one self.reshape_predictions = reshape_predictions self.text = text self.initial_score = self.clf_predict([text]) self.tokenise = tokenise self.explainer = LimeTextExplainer(class_names=self.class_names, split_expression=tokenise) self.words = self.tokenise(text) self.ablations, self.indices = self.create_ablations() self.scores_decrease = self.lime_explain(self.words) self.threshold = threshold self.black_list = self.get_black_list()
def explain(self, docs): """Generate LIME Explanations for list of docs. Takes as input a list of strings that make up the documents where LIME should be applied to. Returns Explanation class instances. Parameters ---------- docs : list of strings List of input documents. Returns ------- exps : list of classes For each input document, an Explanation class object on which for example the .to_list, to_notebook etc functions can be called on. """ explainer = LimeTextExplainer() experiments = [] for doc in docs: # NOTE: this might have messed up in a generator experiment = explainer.explain_instance( doc, self.pipeline.predict_proba, top_labels=self.n_classes) experiments.append(experiment) return experiments
def go(): # save user input in query query = request.args.get('query', '') #query = request.form['query'] # use model to predict classification for query print( "generating classification prediction for message {}...".format(query)) classification_labels = model.predict([query])[0] classification_results = dict(zip(df.columns[4:], classification_labels)) # set-up Lime classes = df.columns[4:].to_list() print("classes = {}".format(classes)) limeexplainer = LimeTextExplainer(class_names=classes) exp = limeexplainer.explain_instance(query, model.predict_proba, num_features=10, top_labels=3) # This will render the go.html Please see that file. return render_template('go.html', query=query, exp=exp.as_html(), model=model[-1], classification_result=classification_results)
def final_yok_classifing(sentence): class_names = ['욕설이 아님', '욕설'] explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(sentence[0], yok_classifier_lime, num_features=100) return exp.as_list()
def explainer(args, text, num_samples: int = 20): """Run LIME explainer on provided classifier""" model = WrapedSenti(args) predictor = model.predict # Create a LimeTextExplainer explainer = LimeTextExplainer( # Specify split option split_expression=lambda x: x.split(), # Our classifer uses bigrams or contextual ordering to classify text # Hence, order matters, and we cannot use bag of words. bow=False, class_names=["neutral", "positive", "negative"], ) # Make a prediction and explain it: exp = explainer.explain_instance( text, classifier_fn=predictor, top_labels=1, num_features=20, num_samples=num_samples, ) return exp
def explain_prediction(sent, file_name): # vect=transform_inp_sent_to_vect(sent) labels = get_categories(sent, file_name) explainer = LimeTextExplainer(class_names=labels) exp = explainer.explain_instance(sent, spacy_prediction, labels=[0, 1]) return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))
def __init__(self): self.model = pickle.load(open("models/rf.pkl", 'rb')) self.class_names = [ 'negative', 'somewhat negative', 'neutral', 'somewhat positive', 'positive' ] self.explainer = LimeTextExplainer(class_names=self.class_names)
def classify_lime(model, dataset, train_dataset, config_dict): explainer = LimeTextExplainer( class_names=(0, 1), bow= False, # try with True as well: False causes masking to be done, True means removing words mask_string=tokenizer.mask_token if not config_dict.get("lime_mask_string_use_pad", False) else tokenizer.pad_token, feature_selection="none", # use all features split_expression=r"\s", ) classify_sentence_partial = partial( batch_predict, model=model, dataset=train_dataset, batch_size=config_dict["per_device_eval_batch_size"], method="lime", ) res_list = [] for i in range(0, len(dataset)): if i % 50 == 0: logger.info("lime_sample_idx:" + str(i) + "/" + str(len(dataset))) exp = explainer.explain_instance( " ".join(dataset.examples[i].words), classify_sentence_partial, labels=(1, ), num_samples=config_dict["lime_num_samples"], ) lst = exp.as_map()[1] lst.sort(key=(lambda x: x[0])) dataset.examples[i].predictions = list(map(lambda x: x[1], lst)) return dataset
def explainer(method: str, path_to_file: str, text: str, lowercase: bool, num_samples: int) -> LimeTextExplainer: """Run LIME explainer on provided classifier""" model = explainer_class(method, path_to_file) predictor = model.predict # Lower case the input text if requested (for certain classifiers) if lowercase: text = text.lower() # Create a LimeTextExplainer explainer = LimeTextExplainer( # Specify split option split_expression=lambda x: x.split(), # Our classifer uses trigrams or contextual ordering to classify text # Hence, order matters, and we cannot use bag of words. bow=False, # Specify class names for this case class_names=[1, 2, 3, 4, 5]) # Make a prediction and explain it: exp = explainer.explain_instance( text, classifier_fn=predictor, top_labels=1, num_features=20, num_samples=num_samples, ) return exp
def __init__(self, class_names, investigate_labels, prediction_fn, tokenize_fn=tokenize_for_lime, num_features=20, num_samples=20, use_top_labels=True, lower_case_bool=False): self.class_names = class_names self.__investigate_labels_lst = investigate_labels self.__num_features = num_features self.__num_samples = num_samples self.__use_top_labels_bool = use_top_labels self.__prediction_fn = prediction_fn if tokenize_fn: self.__explainer = LimeTextExplainer(class_names=class_names, split_expression=tokenize_fn, random_state=0) else: self.__explainer = LimeTextExplainer(class_names=class_names) super().__init__(tokenize_fn=tokenize_fn, lower_case_bool=lower_case_bool)
def limevisual(pData, pDesc, Idx, pClassNames, pAccountName, pVec, nNumFeatures, nTopLabels, tLabels, pRootDir): try: pIntent = pData['Intent'][int(Idx)] _, pModels = loadmodel(pRootDir, pAccountName, pIntent) pPipeModel = make_pipeline(pVec, pModels) tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc) pExplainer = LimeTextExplainer(class_names=pClassNames, split_expression=tokenizer) pExplainText = pExplainer.explain_instance( pData[pDesc][int(Idx)], classifier_fn=pPipeModel.predict_proba, num_features=int(nNumFeatures), top_labels=int(nTopLabels)) pExplainText.show_in_notebook(text=pData[pDesc][int(Idx)], labels=tLabels) pExplainText.save_to_file( 'C:\\Users\\tamohant\\Desktop\\Auto_synthesis_Training_data\\AutoSynthesisLite\\demo.html', labels=None, predict_proba=True, show_predicted_value=True) except Exception as e: print( '*** ERROR[001]: Error in visualiation file of Limevisual function: ', sys.exc_info()[0], str(e)) print(traceback.format_exc()) return (-1) return (0)
def test_lime_text_explainer_bad_regressor(self): newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [ x.split('.')[-1] if 'misc' not in x else '.'.join( x.split('.')[-2:]) for x in newsgroups_train.target_names ] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 1340 with self.assertRaises(TypeError): exp = explainer.explain_instance( # noqa:F841 newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=Lasso())
def explain(self, docs): """Generate LIME Explanations for list of docs. Takes as input a list of strings that make up the documents where LIME should be applied to. Returns Explanation class instances. Parameters ---------- docs : list of strings List of input documents. Returns ------- exps : list of classes For each input document, an Explanation class object on which for example the .to_list, to_notebook etc functions can be called on. """ explainer = LimeTextExplainer() experiments = [] for doc in docs: # NOTE: this might have messed up in a generator experiment = explainer.explain_instance(doc, self.pipeline.predict_proba, top_labels=self.n_classes) experiments.append(experiment) return experiments
def prediction(txt, sentiment, logistic, num_features): ##LIME c = make_pipeline(sentiment.tfidf_vect, logistic) class_names = ['NEGATIVE', 'POSITIVE'] explainer = LimeTextExplainer(class_names=class_names) exp = explainer.explain_instance(txt, c.predict_proba, num_features=num_features) output = "static/outputs/output.html" exp.save_to_file(output) exp.as_pyplot_figure(label=1) plt.savefig('static/outputs/lime_explanation_graph.png') # LOGISTIC REGRESSION list_of_words = re.sub("[^\w]", " ", txt).split() words_with_weights = defaultdict() for word in list_of_words: feats = sentiment.tfidf_vect.get_feature_names() coefs = logistic.coef_[0] if word in feats: ind = feats.index(word) words_with_weights[word] = coefs[ind] data = pd.DataFrame.from_dict(words_with_weights, orient='index') data[0].plot(kind='barh', color=(data[0] > 0).map({True: 'g', False: 'r'})) plt.savefig('static/outputs/log_explanation_graph.png')
def __init__(self, class_names, count_vectorizer): self.__class_name = class_names # Text Explainer for explaining the selected examples. # Reference: https://arxiv.org/abs/1602.04938 # The Explanations help us to check the reliability and validity of the trained machine learning model. # The Explanations confirm that the model chooses the right label/class for the right reason (e.g. meaningful words/features). self.__explainer = LimeTextExplainer(class_names=class_names) self.__count_vectorizer = count_vectorizer
def lime_explanation(classifier, data, features=MAX_FEATURES): explainer = LimeTextExplainer(class_names=CLASS_NAMES) explanation = explainer.explain_instance( text_instance=data, classifier_fn=classifier.predict_proba, num_features=features, ) return explanation
def set_parameters(self, **kwargs): """Parameter setter for lime_text. # Arguments **kwargs: Parameters setter. For more detail, please check https://lime-ml.readthedocs.io/en/latest/index.html. """ class_names = kwargs.pop("class_names", self.class_names) self.explainer = LimeTextExplainer(class_names=class_names, **kwargs)
def limer(example): # show in lime graph # TODO: ext -> html로 return # note가 아닌 html API 찾기 explainer = LimeTextExplainer() exp = explainer.explain_instance(spacing_example(example), lambda s: do_inference(s, True).detach().numpy(), top_labels=1) exp.show_in_notebook()
def explain_instance(headline, body): text = combine_sentence(headline, body) explainer = LimeTextExplainer(class_names=CLASS_NAMES) return explainer.explain_instance(text, classifier_fn, labels=[0, 1, 2, 3], top_labels=4, num_samples=4)
def __init__(self, class_names): self.class_names = class_names self.explainer = LimeTextExplainer(class_names=class_names) self.num_features = 20 self.num_samples = 20 self.use_top_labels = True self.top_labels = 2 self.investigate_labels = list(range(len(class_names)))
def explain(self, text, nwords, return_weights=False): ''' Use `LimeTextExplainer` to obtain the top `nwords` most important/polar words in the `text` as an explanation. Parameters -------------- text: str The text to explain. nwords: int The number of most important words to return (i.e. explanation size). return_weights: bool Set to True to return the weights assigned by LIME also. Returns --------------- word_ranking : list Indexes of the `nwords` top-ranked words in the text. ranked_words: list List of `nwords` top-ranked words in the text. weights: dict, optional The dictionary of weights (wordposition -> weight) assigned by LIME to the words in the text. explanation: optional The explanation object returned by `LimeTextExplainer`. ''' text = preprocess_text(text) text_words = get_tokens(text) class_names = ['negative', 'positive'] # bow is set to False because word order is important explainer = LimeTextExplainer(class_names=class_names, feature_selection='auto', bow=False, split_expression=' ', verbose=False) explanation = explainer.explain_instance( text_instance=text, labels=[0, 1], classifier_fn=self.predict_texts, num_features=nwords, num_samples=self.nsamples) # sort weights by decreasing absolute value weights = OrderedDict( sorted(explanation.as_map()[1], key=lambda weight: -abs(weight[1]))) word_ranking = np.array(list(weights.keys())) ranked_words = [text_words[i] for i in word_ranking] if return_weights: return word_ranking, ranked_words, weights, explanation return word_ranking, ranked_words
def model_load_and_explain(x_text_input): from lime import lime_text print(max_document_length) if x_text_input == 'default': x_text_instance = '" extreme ops " exceeds expectations . good fun , good action , good acting , good dialogue , good pace , good cinematography .' else: x_text_instance = x_text_input # print(x_text_instance) output = cnn.predict_text_instance([x_text_instance]) #batch -> instnace from lime.lime_text import LimeTextExplainer class_names = ['Negative', 'Positive'] explainer = LimeTextExplainer(class_names=class_names) # print(x_text_instance) # print(type(x_text_instance)) exp = explainer.explain_instance(x_text_instance, cnn.predict_text_instance, num_features=6) exp.as_list() print("") print("output prob (Negative, Positive)") print('Original prediction:', cnn.predict_text_instance([x_text_instance])[0]) print("") x_text_removed = x_text_instance x_text_removed = x_text_removed.replace(exp.as_list()[0][0], '<unk>') x_text_removed = x_text_removed.replace(exp.as_list()[1][0], '<unk>') print("x_text_instance: ", x_text_instance) print("") print("x_text_removed: ", x_text_removed) print(exp.as_list()[0][0]) print(exp.as_list()[1][0]) print("") print('Prediction removing some features:', cnn.predict_text_instance([x_text_removed])[0]) print( 'Difference:', cnn.predict_text_instance([x_text_instance])[0] - cnn.predict_text_instance([x_text_removed])[0]) timestamp = str(int(time.time())) static_dir = os.path.abspath(os.path.join(os.curdir, 'static')) oi_lime_dir = os.path.abspath(os.path.join(static_dir, 'oi_lime')) oi_file_path = os.path.abspath( os.path.join(oi_lime_dir, 'oi_' + timestamp + '.html')) exp.save_to_file(oi_file_path) return 'oi_' + timestamp + '.html'
def explain(clf, X_train, y, instance, name, method): clf.fit(X_train, y) explainer = LimeTextExplainer(class_names=[-1, 0, 1]) exp = explainer.explain_instance(instance, method, top_labels=1, num_features=10) exp.show_in_notebook() exp.save_to_file(f"../{name}_explanation.html")
def interpret_data(X, y, func, class_names): explainer = LimeTextExplainer(class_names=class_names) times, scores = [], [] for r_idx in range(10): start_time = time.time() exp = explainer.explain_instance(newsgroups_test.data[r_idx], func, num_features=6) times.append(time.time() - start_time) scores.append(exp.score) print('...') return times, scores
def explain_text(self, labels, instance, column_name=None, num_features=10, num_samples=5000): """Explain a text field of a prediction. It analyze the prediction by LIME, and returns a report of which words are most impactful in contributing to certain labels. Args: labels: a list of labels to explain. instance: the prediction instance. It needs to conform to model's input. Can be a csv line string, or a dict. column_name: which text column to explain. Can be None if there is only one text column in the model input. num_features: maximum number of words (features) to analyze. Passed to LIME LimeTextExplainer directly. num_samples: size of the neighborhood to learn the linear model. Passed to LIME LimeTextExplainer directly. Returns: A LIME's lime.explanation.Explanation. Throws: ValueError if the given text column is not found in model input or column_name is None but there are multiple text columns in model input. """ from lime.lime_text import LimeTextExplainer if len(self._text_columns) > 1 and not column_name: raise ValueError('There are multiple text columns in the input of the model. ' + 'Please specify "column_name".') elif column_name and column_name not in self._text_columns: raise ValueError('Specified column_name "%s" not found in the model input.' % column_name) text_column_name = column_name if column_name else self._text_columns[0] if isinstance(instance, six.string_types): instance = next(csv.DictReader([instance], fieldnames=self._headers)) predict_fn = self._make_text_predict_fn(labels, instance, text_column_name) explainer = LimeTextExplainer(class_names=labels) exp = explainer.explain_instance( instance[text_column_name], predict_fn, labels=range(len(labels)), num_features=num_features, num_samples=num_samples) return exp
def test_lime_text_explainer_good_regressor(self): from sklearn.datasets import fetch_20newsgroups newsgroups_train = fetch_20newsgroups(subset='train') newsgroups_test = fetch_20newsgroups(subset='test') # making class names shorter class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in newsgroups_train.target_names] class_names[3] = 'pc.hardware' class_names[4] = 'mac.hardware' vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 1340 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=LinearRegression())
def test_lime_text_explainer_good_regressor(self): categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) test_vectors = vectorizer.transform(newsgroups_test.data) nb = MultinomialNB(alpha=.01) nb.fit(train_vectors, newsgroups_train.target) pred = nb.predict(test_vectors) f1_score(newsgroups_test.target, pred, average='weighted') c = make_pipeline(vectorizer, nb) explainer = LimeTextExplainer(class_names=class_names) idx = 83 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6) self.assertIsNotNone(exp) self.assertEqual(6, len(exp.as_list()))
import sklearn from sklearn.ensemble import RandomForestClassifier from sklearn.pipeline import make_pipeline from sklearn.datasets import fetch_20newsgroups import matplotlib.pyplot as plt from lime.lime_text import LimeTextExplainer categories = ['alt.atheism', 'soc.religion.christian'] newsgroups_train = fetch_20newsgroups(subset='train', categories=categories) newsgroups_test = fetch_20newsgroups(subset='test', categories=categories) class_names = ['atheism', 'christian'] vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False) train_vectors = vectorizer.fit_transform(newsgroups_train.data) rf = RandomForestClassifier(n_estimators=500) rf.fit(train_vectors, newsgroups_train.target) c = make_pipeline(vectorizer, rf) explainer = LimeTextExplainer(class_names=class_names) idx = 81 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10) print('Document id: %d' % idx) print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1]) print('True class: %s' % class_names[newsgroups_test.target[idx]]) fig = exp.as_pyplot_figure() plt.show()
def TextInterpret(text, predict): lte = LimeTextExplainer() explanation = lte.explain_instance(text, predict) explanation.show_in_notebook() return explanation
# In[6]: from lime import lime_text from sklearn.pipeline import make_pipeline c = make_pipeline(vectorizer, rf) # In[7]: print(c.predict_proba([newsgroups_test.data[0]])) # In[63]: from lime.lime_text import LimeTextExplainer explainer = LimeTextExplainer(class_names=class_names) # In[64]: idx = 83 exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10) print("value to be predicted") print(newsgroups_test.data[idx]) print(newsgroups_test.target[idx]) print(newsgroups_test.target_names) # In[17]: print('Document id: %d' % idx)