Exemplo n.º 1
0
    def __init__(self,
                 class_names,
                 investigate_labels,
                 prediction_fn,
                 tokenize_fn=tokenize_for_lime,
                 num_features=20,
                 num_samples=20,
                 use_top_labels=True,
                 lower_case_bool=False):
        self.class_names = class_names

        self.__investigate_labels_lst = investigate_labels
        self.__num_features = num_features
        self.__num_samples = num_samples
        self.__use_top_labels_bool = use_top_labels

        self.__prediction_fn = prediction_fn

        if tokenize_fn:
            self.__explainer = LimeTextExplainer(class_names=class_names,
                                                 split_expression=tokenize_fn,
                                                 random_state=0)
        else:
            self.__explainer = LimeTextExplainer(class_names=class_names)
        super().__init__(tokenize_fn=tokenize_fn,
                         lower_case_bool=lower_case_bool)
Exemplo n.º 2
0
    def test_lime_text_tabular_not_equal_random_state(self):
        categories = ['alt.atheism', 'soc.religion.christian']
        newsgroups_train = fetch_20newsgroups(subset='train',
                                              categories=categories)
        newsgroups_test = fetch_20newsgroups(subset='test',
                                             categories=categories)
        class_names = ['atheism', 'christian']
        vectorizer = TfidfVectorizer(lowercase=False)
        train_vectors = vectorizer.fit_transform(newsgroups_train.data)
        test_vectors = vectorizer.transform(newsgroups_test.data)
        nb = MultinomialNB(alpha=.01)
        nb.fit(train_vectors, newsgroups_train.target)
        pred = nb.predict(test_vectors)
        f1_score(newsgroups_test.target, pred, average='weighted')
        c = make_pipeline(vectorizer, nb)

        explainer = LimeTextExplainer(class_names=class_names, random_state=10)
        exp_1 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba,
                                           num_features=6)

        explainer = LimeTextExplainer(class_names=class_names, random_state=20)
        exp_2 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba,
                                           num_features=6)

        self.assertFalse(exp_1.as_map() == exp_2.as_map())
Exemplo n.º 3
0
def finalExplain_n(codes):
    resData = []
    r = Rake()
    classNames = ['negative', 'positive']
    exp = LimeTextExplainer(class_names=classNames)
    for j, code in enumerate(codes):
        tmpResult = {}
        c = translate(code)
        com = ''
        for i in range(1, len(c)):
            if c[i] == '</s>':
                break
            com += c[i] + ' '
        tmpResult['code'] = code
        tmpResult['comment'] = com
        r.extract_keywords_from_text(com)
        comKeys = r.get_ranked_phrases()
        tmpResult['commentKeywords'] = comKeys

        tmpList = []
        for _key in comKeys:
            global key
            key = _key
            tmpExp = {
                'commentKeyword': key,
            }
            explanation = exp.explain_instance(code,
                                               predictorLime,
                                               num_features=6)
            print(explanation.as_list())
            tmpExp['lime'] = explanation.as_list
            tmpList.append(tmpExp)
        tmpResult['explanations'] = tmpList
        resData.append(tmpResult)
    return resData
Exemplo n.º 4
0
def prediction(txt, sentiment, logistic, num_features):
    ##LIME
    c = make_pipeline(sentiment.tfidf_vect, logistic)
    class_names = ['NEGATIVE', 'POSITIVE']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(txt,
                                     c.predict_proba,
                                     num_features=num_features)
    output = "static/outputs/output.html"
    exp.save_to_file(output)
    exp.as_pyplot_figure(label=1)
    plt.savefig('static/outputs/lime_explanation_graph.png')

    # LOGISTIC REGRESSION
    list_of_words = re.sub("[^\w]", " ", txt).split()
    words_with_weights = defaultdict()
    for word in list_of_words:
        feats = sentiment.tfidf_vect.get_feature_names()
        coefs = logistic.coef_[0]
        if word in feats:
            ind = feats.index(word)
            words_with_weights[word] = coefs[ind]

    data = pd.DataFrame.from_dict(words_with_weights, orient='index')
    data[0].plot(kind='barh', color=(data[0] > 0).map({True: 'g', False: 'r'}))
    plt.savefig('static/outputs/log_explanation_graph.png')
Exemplo n.º 5
0
def go():
    # save user input in query
    query = request.args.get('query', '')
    #query = request.form['query']

    # use model to predict classification for query
    print(
        "generating classification prediction for message {}...".format(query))
    classification_labels = model.predict([query])[0]
    classification_results = dict(zip(df.columns[4:], classification_labels))

    # set-up Lime
    classes = df.columns[4:].to_list()
    print("classes = {}".format(classes))
    limeexplainer = LimeTextExplainer(class_names=classes)
    exp = limeexplainer.explain_instance(query,
                                         model.predict_proba,
                                         num_features=10,
                                         top_labels=3)

    # This will render the go.html Please see that file.
    return render_template('go.html',
                           query=query,
                           exp=exp.as_html(),
                           model=model[-1],
                           classification_result=classification_results)
Exemplo n.º 6
0
 def __init__(self,
              classifier,
              text,
              one_by_one=False,
              tokenise=lambda txt: txt.split(),
              class_names=[0, 1],
              mask=u"[mask]",
              threshold=0.2,
              reshape_predictions=True):
     """
     Given a classifier and a tokenisation method LimeUsd returns the toxic words and the respective offsets.
     This implementation is based on LIME.
     :param classifier: any toxicity classifier that predicts a text as toxic or not
     :param text: the textual input (sentence or document) as a string
     :param one_by_one: some classifiers may require one by one classification when scoring the "ablated" texts.
     :param tokenise: by default splits the words on empty space -- same as LIME
     :param class_names: by default "toxic" is represented by 1 and "civil" by 0
     :param mask: the pseudo token to mask the toxic word (for visualisation purposes)
     :param threshold: above this value the text is predicted toxic (default 0.2)
     :param reshape_predictions: flattens the output, some classifiers may required this to be set to False
     """
     self.class_names = class_names
     self.classifier = classifier
     self.mask = mask
     self.one_by_one = one_by_one
     self.reshape_predictions = reshape_predictions
     self.text = text
     self.initial_score = self.clf_predict([text])
     self.tokenise = tokenise
     self.explainer = LimeTextExplainer(class_names=self.class_names, split_expression=tokenise)
     self.words = self.tokenise(text)
     self.ablations, self.indices = self.create_ablations()
     self.scores_decrease = self.lime_explain(self.words)
     self.threshold = threshold
     self.black_list = self.get_black_list()
Exemplo n.º 7
0
    def explain(self, docs):
        """Generate LIME Explanations for list of docs.

        Takes as input a list of strings that make up the documents where LIME
        should be applied to. Returns Explanation class instances.

        Parameters
        ----------
        docs : list of strings
            List of input documents.

        Returns
        -------
        exps : list of classes
            For each input document, an Explanation class object on which for
            example the .to_list, to_notebook etc functions can be called on.
        """
        explainer = LimeTextExplainer()
        experiments = []

        for doc in docs:  # NOTE: this might have messed up in a generator
            experiment = explainer.explain_instance(
                doc, self.pipeline.predict_proba, top_labels=self.n_classes)
            experiments.append(experiment)

        return experiments
Exemplo n.º 8
0
def classify_lime(model, dataset, train_dataset, config_dict):
    explainer = LimeTextExplainer(
        class_names=(0, 1),
        bow=
        False,  # try with True as well: False causes masking to be done, True means removing words
        mask_string=tokenizer.mask_token
        if not config_dict.get("lime_mask_string_use_pad", False) else
        tokenizer.pad_token,
        feature_selection="none",  # use all features
        split_expression=r"\s",
    )
    classify_sentence_partial = partial(
        batch_predict,
        model=model,
        dataset=train_dataset,
        batch_size=config_dict["per_device_eval_batch_size"],
        method="lime",
    )

    res_list = []
    for i in range(0, len(dataset)):
        if i % 50 == 0:
            logger.info("lime_sample_idx:" + str(i) + "/" + str(len(dataset)))
        exp = explainer.explain_instance(
            " ".join(dataset.examples[i].words),
            classify_sentence_partial,
            labels=(1, ),
            num_samples=config_dict["lime_num_samples"],
        )
        lst = exp.as_map()[1]

        lst.sort(key=(lambda x: x[0]))
        dataset.examples[i].predictions = list(map(lambda x: x[1], lst))
    return dataset
Exemplo n.º 9
0
    def __init__(self, model_path=None, cuda_device=1):
        # model_path = model_path or LSTM_MODEL_PATH
        model_path = model_path or ROBERTA_MODEL_PATH
        self.predictor = Predictor.from_path(model_path,
                                             cuda_device=cuda_device)

        _tokenizer = PretrainedTransformerTokenizer(
            model_name="roberta-base", max_length=TRANSFORMER_WORDPIECE_LIMIT)
        class_name_mapper = {"0": "Negative", "1": "Positive"}
        _model = self.predictor._model
        _label_namespace = _model._label_namespace
        class_names = [
            class_name_mapper[_model.vocab.get_index_to_token_vocabulary(
                _label_namespace).get(0)],
            class_name_mapper[_model.vocab.get_index_to_token_vocabulary(
                _label_namespace).get(1)]
        ]
        # reset the tokenizer to remove separators
        self.tokenizer = lambda s: [
            t.text.replace("Ġ", "").replace('Ċ', '').replace('ĉ', "")
            for t in _tokenizer.tokenize(s)
        ][1:-1]
        self.explainer_lime = LimeTextExplainer(
            class_names=class_names, split_expression=self.tokenizer)
        self.explainer_integrate = IntegratedGradient(self.predictor)
        self.explainer_simple = SimpleGradient(self.predictor)
Exemplo n.º 10
0
    def run(self, input_text, print_results=True):
        output = self.classify_text(input_text, True)
        outputlen = len(output)

        classes = output[0:int((outputlen / 2) - 1)]
        scores = output[int((outputlen / 2)):outputlen - 1]

        if print_results:
            print('Intents: ' + '\t'.join(map(str, classes)))
            print('Scores: ' + '\t'.join(map(str, scores)))

        # explain class
        explainer = LimeTextExplainer(class_names=classes)
        exp = explainer.explain_instance(input_text,
                                         self.classify_text,
                                         num_features=7,
                                         top_labels=3,
                                         num_samples=1000)

        # print explanation
        if print_results:
            print("")
            print('Explanation for class %s' % classes[0])
            print('\n'.join(map(str, exp.as_list(label=0))))

        return exp
Exemplo n.º 11
0
def explainer(args, text, num_samples: int = 20):
    """Run LIME explainer on provided classifier"""

    model = WrapedSenti(args)
    predictor = model.predict

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses bigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        class_names=["neutral", "positive", "negative"],
    )

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp
Exemplo n.º 12
0
 def create_explainer_object(self):
     explainer = LimeTextExplainer(
         split_expression= self.tokenize_string,
         # bow=True,
         class_names= self.class_names
     )
     return explainer
Exemplo n.º 13
0
def classifier(request, format=None):
    tm_classifier = Pickle.objects.get(name='clf')
    classifier = tm_classifier.pickled_model
    tm_vectorizer = Pickle.objects.get(name='tfidf')
    vectorizer = tm_vectorizer.pickled_model
    input_text = request.data.get('description', 'ERROR')
    if not input_text:
        response = {'error': 'Input is an empty string'}
        return Response(response, status=status.HTTP_404_NOT_FOUND)
    standardized_text = standardize_text(input_text)
    explainer = LimeTextExplainer(class_names=GRADE_CATEGORIES)
    c = make_pipeline(vectorizer, classifier)
    exp = explainer.explain_instance(standardized_text,
                                     c.predict_proba,
                                     num_features=6,
                                     labels=[0, 1, 2, 3])
    predict_probas = dict(zip(exp.class_names, exp.predict_proba))
    prediction = max(predict_probas.items(), key=itemgetter(1))[0]
    response = {
        'final_prediction': prediction,
        'ordered_class_names': exp.class_names,
        'predict_probas': predict_probas,
        'as_list': {
            exp.class_names[lbl]: exp.as_list(label=lbl)
            for lbl in exp.available_labels()
        },
        'standardized_text': standardized_text
    }
    return Response(response, status=status.HTTP_201_CREATED)
def explainer(method: str, path_to_file: str, text: str, lowercase: bool,
              num_samples: int) -> LimeTextExplainer:
    """Run LIME explainer on provided classifier"""

    model = explainer_class(method, path_to_file)
    predictor = model.predict
    # Lower case the input text if requested (for certain classifiers)
    if lowercase:
        text = text.lower()

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses trigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        # Specify class names for this case
        class_names=[1, 2, 3, 4, 5])

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp
Exemplo n.º 15
0
 def test_lime_text_explainer_bad_regressor(self):
     newsgroups_train = fetch_20newsgroups(subset='train')
     newsgroups_test = fetch_20newsgroups(subset='test')
     # making class names shorter
     class_names = [
         x.split('.')[-1] if 'misc' not in x else '.'.join(
             x.split('.')[-2:]) for x in newsgroups_train.target_names
     ]
     class_names[3] = 'pc.hardware'
     class_names[4] = 'mac.hardware'
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 1340
     with self.assertRaises(TypeError):
         exp = explainer.explain_instance(  # noqa:F841
             newsgroups_test.data[idx],
             c.predict_proba,
             num_features=6,
             labels=[0, 17],
             model_regressor=Lasso())
Exemplo n.º 16
0
 def get_result_per_word(self, text, num_samples):
     if not self.intention_names:
         return {}
     explainer = LimeTextExplainer(class_names=self.intention_names)
     labels = list(range(len(self.intention_names)))  # List
     try:
         exp = explainer.explain_instance(text,
                                          self.parse,
                                          num_features=6,
                                          labels=labels,
                                          num_samples=num_samples)
     except ValueError:
         labels = []
     result_per_word = {}
     for label in labels:
         for j in exp.as_list(label=label):
             if j[0] not in result_per_word:
                 result_per_word[j[0]] = []
             result_per_word[j[0]].append({
                 "intent":
                 self.intention_names[label],
                 "relevance":
                 j[1] * 100
             })
     for word in result_per_word:
         result_per_word[word] = sorted(result_per_word[word],
                                        key=lambda k: k.get("relevance"),
                                        reverse=True)
     return result_per_word
Exemplo n.º 17
0
def final_yok_classifing(sentence):
    class_names = ['욕설이 아님', '욕설']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(sentence[0],
                                     yok_classifier_lime,
                                     num_features=100)
    return exp.as_list()
Exemplo n.º 18
0
    def get_result_per_intent(self, text, num_samples):
        explainer = LimeTextExplainer(class_names=self.intention_names)
        labels = list(range(len(self.intention_names)))  # List
        exp = explainer.explain_instance(text,
                                         self.parse,
                                         num_features=6,
                                         labels=labels,
                                         num_samples=num_samples)
        result_per_intent = {}
        for intent in self.intention_names:
            result_per_intent[intent] = []
        for i in labels:
            intent_sum = 0
            for j in exp.as_list(label=i):
                result_per_intent[self.intention_names[i]].append({
                    "word":
                    j[0],
                    "relevance":
                    j[1] * 100
                })
                intent_sum += j[1]
            result_per_intent[self.intention_names[i]].append({
                "sum": intent_sum,
                "relevance": -1
            })
        for intent in result_per_intent:
            result_per_intent[intent] = sorted(
                result_per_intent[intent],
                key=lambda k: k.get("relevance"),
                reverse=True,
            )

        return result_per_intent
def get_lime(model, test_tokens, model_name):
    explainer = LimeTextExplainer(class_names=["genuine", "deceptive"],
                                  split_expression=u'\s+')
    W = []
    for idx, text in enumerate(test_tokens):
        tmp_d = {}
        for i in text.split():
            tmp_d[i] = 1
        exp = explainer.explain_instance(text,
                                         partial(wrapper_clf_predict,
                                                 model=model,
                                                 model_name=model_name),
                                         num_features=len(text.split()),
                                         num_samples=1000)
        if len(tmp_d) != len(exp.as_list()):
            print(idx, len(tmp_d), len(dict(exp.as_list())))
        W.append(dict(exp.as_list()))
        if (idx + 1) % 10 == 0:
            print('{} instances have been processed..'.format(idx + 1))
    features_l, scores_l = [], []
    for d in W:
        features, scores = [], []
        for key, score in d.items():
            features.append(key)
            tmp = ' '.join(features)
            scores.append(score)  # abs value should be taken subsequently
        features_l.append(tmp)
        scores_l.append(scores)
    return features_l, scores_l
Exemplo n.º 20
0
    def explain_one_example(self, idx=None, num_features=5, print_out=True):
        '''Explaines predictions for a single datapoint with LIME.

        If the index of the datapoint is not specified, explaines random point
        from the validation data. Optionally prints out explanation.

        # Arguments:
            idx: int, index of a datapoint in the validation data (default=None)
            num_features: int, number of explanatory features (default=5)
            print_out: boolean (default=True)

        # Returns:
            exp: lime.explanation.Explanation object
        '''
        if idx is None:
            idx = np.random.choice(self.for_explanation.index)

        explainer = LimeTextExplainer(class_names=self.class_names)
        exp = explainer.explain_instance(self.for_explanation[idx],
                                         self.predict_proba,
                                         num_features=num_features)

        if print_out:
            print('Tweet {}: {}'.format(idx, self.for_explanation[idx]))
            print(self.predict_proba([self.for_explanation[idx]]))
            print(exp.as_pyplot_figure())
            plt.show()
        return exp
Exemplo n.º 21
0
 def __init__(self):
     self.model = pickle.load(open("models/rf.pkl", 'rb'))
     self.class_names = [
         'negative', 'somewhat negative', 'neutral', 'somewhat positive',
         'positive'
     ]
     self.explainer = LimeTextExplainer(class_names=self.class_names)
Exemplo n.º 22
0
def export_explanations (model, X_test = X_test, class_names = ["Irrelevante", "Evidência"], name="name", n_exp=20):
    '''
    Arguments here are:
        Model has to be a pipeline with (vectorizer +  model) to explain.
        X_test = the subset of testing data in order to compute the best coverage of features to explain
        class names
        name is the argument that will name the files in disk
        and number of variables to pick
    '''
    class_names = ["Irrelevante","Evidência"]
    explainer = LimeTextExplainer(class_names=class_names)


        ######   Here begin the sub-modular pick code
    sp_obj = submodular_pick.SubmodularPick(explainer, X_test, model.predict_proba, sample_size=n_exp, num_features=15,num_exps_desired=n_exp)
    imagens = [exp.as_pyplot_figure(label=exp.available_labels()[0]) for exp in sp_obj.sp_explanations]
    i =0
    for exp in sp_obj.sp_explanations:
        exp.save_to_file(file_path="{}explanation{}.html".format(name,i))
        i+=1
    i=0
    for img in imagens:
        img.savefig("{}Imagem{}".format(name,i))
        i+=1
        #plt.close(img)
        plt.close('all')
    pass
Exemplo n.º 23
0
def explain_prediction(sent, file_name):
    #    vect=transform_inp_sent_to_vect(sent)
    labels = get_categories(sent, file_name)
    explainer = LimeTextExplainer(class_names=labels)

    exp = explainer.explain_instance(sent, spacy_prediction, labels=[0, 1])
    return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))
Exemplo n.º 24
0
def limevisual(pData, pDesc, Idx, pClassNames, pAccountName, pVec,
               nNumFeatures, nTopLabels, tLabels, pRootDir):
    try:
        pIntent = pData['Intent'][int(Idx)]
        _, pModels = loadmodel(pRootDir, pAccountName, pIntent)
        pPipeModel = make_pipeline(pVec, pModels)
        tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc)
        pExplainer = LimeTextExplainer(class_names=pClassNames,
                                       split_expression=tokenizer)
        pExplainText = pExplainer.explain_instance(
            pData[pDesc][int(Idx)],
            classifier_fn=pPipeModel.predict_proba,
            num_features=int(nNumFeatures),
            top_labels=int(nTopLabels))
        pExplainText.show_in_notebook(text=pData[pDesc][int(Idx)],
                                      labels=tLabels)
        pExplainText.save_to_file(
            'C:\\Users\\tamohant\\Desktop\\Auto_synthesis_Training_data\\AutoSynthesisLite\\demo.html',
            labels=None,
            predict_proba=True,
            show_predicted_value=True)
    except Exception as e:
        print(
            '*** ERROR[001]: Error in visualiation file of Limevisual function: ',
            sys.exc_info()[0], str(e))
        print(traceback.format_exc())
        return (-1)
    return (0)
Exemplo n.º 25
0
def lime_explanation(classifier, data, features=MAX_FEATURES):
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    explanation = explainer.explain_instance(
        text_instance=data,
        classifier_fn=classifier.predict_proba,
        num_features=features,
    )
    return explanation
Exemplo n.º 26
0
    def set_parameters(self, **kwargs):
        """Parameter setter for lime_text.

        # Arguments
            **kwargs: Parameters setter. For more detail, please check https://lime-ml.readthedocs.io/en/latest/index.html.
        """
        class_names = kwargs.pop("class_names", self.class_names)
        self.explainer = LimeTextExplainer(class_names=class_names, **kwargs)
Exemplo n.º 27
0
def explain_instance(headline, body):
    text = combine_sentence(headline, body)
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    return explainer.explain_instance(text,
                                      classifier_fn,
                                      labels=[0, 1, 2, 3],
                                      top_labels=4,
                                      num_samples=4)
Exemplo n.º 28
0
 def __init__(self, class_names, count_vectorizer):
     self.__class_name = class_names
     # Text Explainer for explaining the selected examples.
     # Reference: https://arxiv.org/abs/1602.04938
     # The Explanations help us to check the reliability and validity of the trained machine learning model.
     # The Explanations confirm that the model chooses the right label/class for the right reason (e.g. meaningful words/features).
     self.__explainer = LimeTextExplainer(class_names=class_names)
     self.__count_vectorizer = count_vectorizer
Exemplo n.º 29
0
 def __init__(self, class_names):
     self.class_names = class_names
     self.explainer = LimeTextExplainer(class_names=class_names)
     self.num_features = 20
     self.num_samples = 20
     self.use_top_labels = True
     self.top_labels = 2
     self.investigate_labels = list(range(len(class_names)))
Exemplo n.º 30
0
def limer(example):
    # show in lime graph
    # TODO: ext -> html로 return
    # note가 아닌 html API 찾기
    explainer = LimeTextExplainer()
    exp = explainer.explain_instance(spacing_example(example), lambda s: do_inference(s, True).detach().numpy(),
                                     top_labels=1)
    exp.show_in_notebook()