Exemplo n.º 1
0
def test_explain_linear_dense():
    clf = LogisticRegression(random_state=42)
    data = [{
        'day': 'mon',
        'moon': 'full'
    }, {
        'day': 'tue',
        'moon': 'rising'
    }, {
        'day': 'tue',
        'moon': 'rising'
    }, {
        'day': 'mon',
        'moon': 'rising'
    }]
    vec = DictVectorizer(sparse=False)
    X = vec.fit_transform(data)
    clf.fit(X, [0, 1, 1, 0])
    test_day = {'day': 'tue', 'moon': 'full'}
    target_names = ['sunny', 'shady']
    res1 = explain_prediction(clf,
                              test_day,
                              vec=vec,
                              target_names=target_names)
    expl_text, expl_html = format_as_all(res1, clf)
    assert 'day=tue' in expl_text
    assert 'day=tue' in expl_html
    [test_day_vec] = vec.transform(test_day)
    res2 = explain_prediction(clf,
                              test_day_vec,
                              target_names=target_names,
                              vectorized=True,
                              feature_names=vec.get_feature_names())
    assert res1 == res2
def explainELI5(rows, name, classes, XY):
    import eli5
    import eli5.sklearn
    from eli5 import show_weights
    from eli5.sklearn import PermutationImportance
    from eli5.sklearn import explain_prediction_linear_classifier
    from eli5 import show_prediction

    cols = XY[2]

    clf = loadModel(name+"-"+classes)

    ExplainPath_Specific = ExplainPath + name + "_" + classes

    weights = eli5.explain_weights(clf, feature_names=cols, top=(len(cols)+1))
    predictionA = eli5.explain_prediction(clf, XY[0][0], feature_names=cols, top=(len(cols)+1))
    predictionB = eli5.explain_prediction(clf, XY[0][1], feature_names=cols, top=(len(cols)+1))

    html = eli5.format_as_html(weights)
    with open(uniquify(ExplainPath_Specific  + "/" + classes +"_ELI5_WEIGHTS.html"), 'w') as f:
        f.write(html)
        f.close()

    html = eli5.format_as_html(predictionA)
    with open(uniquify(ExplainPath_Specific  + "/" + classes +"_ELI5_PREDICTION_A.html"), 'w') as f:
        f.write(html)
        f.close()
    html = eli5.format_as_html(predictionB)
    with open(uniquify(ExplainPath_Specific  + "/" + classes +"_ELI5_PREDICTION_B.html"), 'w') as f:
        f.write(html)
        f.close()
def assert_binary_linear_classifier_explained(newsgroups_train_binary, clf,
                                              explain_prediction):
    docs, y, target_names = newsgroups_train_binary
    vec = TfidfVectorizer()

    X = vec.fit_transform(docs)
    clf.fit(X, y)

    assert y[2] == 1
    cg_document = docs[2]
    res = explain_prediction(clf, cg_document, vec=vec,
                             target_names=target_names, top=20)
    expl_text, expl_html = format_as_all(res, clf)
    for expl in [expl_text, expl_html]:
        assert 'software' in expl or 'thanks' in expl
        assert target_names[1] in expl

    assert y[15] == 0
    atheism_document = docs[15]
    res = explain_prediction(clf, atheism_document, vec=vec,
                             target_names=target_names, top=20)
    expl_text, expl_html = format_as_all(res, clf)
    for expl in [expl_text, expl_html]:
        assert 'god' in expl
        assert target_names[0] in expl

    assert_correct_class_explained_binary(clf, X[::10])
Exemplo n.º 4
0
def test_explain_prediction_unsupported():
    clf = BaseEstimator()
    doc = 'doc'
    res = explain_prediction(clf, doc)
    assert 'BaseEstimator' in res.error
    with pytest.raises(TypeError):
        explain_prediction(clf, doc, unknown_argument=True)
Exemplo n.º 5
0
def test_explain_hashing_vectorizer(newsgroups_train_binary):
    # test that we can pass InvertableHashingVectorizer explicitly
    vec = HashingVectorizer(n_features=1000)
    ivec = InvertableHashingVectorizer(vec)
    clf = LogisticRegression(random_state=42)
    docs, y, target_names = newsgroups_train_binary
    ivec.fit([docs[0]])
    X = vec.fit_transform(docs)
    clf.fit(X, y)

    get_res = lambda **kwargs: explain_prediction(
        clf, docs[0], vec=ivec, target_names=target_names, top=20, **kwargs)
    res = get_res()
    check_explain_linear_binary(res, clf)
    assert res == get_res()
    res_vectorized = explain_prediction(clf,
                                        vec.transform([docs[0]])[0],
                                        vec=ivec,
                                        target_names=target_names,
                                        top=20,
                                        vectorized=True)
    pprint(res_vectorized)
    assert res_vectorized == _without_weighted_spans(res)

    assert res == get_res(feature_names=ivec.get_feature_names(
        always_signed=False))
Exemplo n.º 6
0
def test_explain_prediction_booster_multitarget(newsgroups_train):
    docs, ys, target_names = newsgroups_train
    vec = CountVectorizer(stop_words='english', dtype=np.float64)
    xs = vec.fit_transform(docs)
    clf = lightgbm.train(params={
        'objective': 'multiclass',
        'verbose_eval': -1,
        'max_depth': 2,
        'n_estimators': 100,
        'min_child_samples': 1,
        'min_child_weight': 1,
        'num_class': len(target_names)
    },
                         train_set=lightgbm.Dataset(xs.toarray(), label=ys))

    doc = 'computer graphics in space: a new religion'
    res = explain_prediction(clf, doc, vec=vec, target_names=target_names)
    format_as_all(res, clf)
    check_targets_scores(res)
    graphics_weights = res.targets[1].feature_weights
    assert 'computer' in get_all_features(graphics_weights.pos)
    religion_weights = res.targets[3].feature_weights
    assert 'religion' in get_all_features(religion_weights.pos)

    top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
    assert len(top_target_res.targets) == 2
    assert sorted(t.proba for t in top_target_res.targets) == sorted(
        t.proba for t in res.targets)[-2:]
Exemplo n.º 7
0
def assert_linear_regression_explained(boston_train, reg, explain_prediction):
    X, y, feature_names = boston_train
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    expl_text, expl_html = format_as_all(res, reg)

    assert len(res.targets) == 1
    target = res.targets[0]
    assert target.target == 'y'
    pos, neg = (get_all_features(target.feature_weights.pos),
                get_all_features(target.feature_weights.neg))
    assert 'x11' in pos or 'x11' in neg

    if has_intercept(reg):
        assert '<BIAS>' in pos or '<BIAS>' in neg
        assert '<BIAS>' in expl_text
        assert '&lt;BIAS&gt;' in expl_html
    else:
        assert '<BIAS>' not in pos and '<BIAS>' not in neg
        assert '<BIAS>' not in expl_text
        assert 'BIAS' not in expl_html

    for expl in [expl_text, expl_html]:
        assert 'x11' in expl
        assert '(score' in expl
    assert "'y'" in expl_text
    assert '<b>y</b>' in strip_blanks(expl_html)

    assert res == explain_prediction(reg, X[0])
def assert_multitarget_linear_regression_explained(reg, explain_prediction):
    X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
                           random_state=42)
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    expl_text, expl_html = format_as_all(res, reg)

    assert len(res.targets) == 3
    target = res.targets[1]
    assert target.target == 'y1'
    pos, neg = (get_all_features(target.feature_weights.pos),
                get_all_features(target.feature_weights.neg))
    assert 'x8' in pos or 'x8' in neg
    if has_intercept(reg):
        assert '<BIAS>' in pos or '<BIAS>' in neg

    assert 'x8' in expl_text
    if has_intercept(reg):
        assert '<BIAS>' in expl_text
    assert "'y2'" in expl_text

    assert res == explain_prediction(reg, X[0])
    check_targets_scores(res)

    top_targets_res = explain_prediction(reg, X[0], top_targets=1)
    assert len(top_targets_res.targets) == 1
Exemplo n.º 9
0
def assert_linear_regression_explained(boston_train,
                                       reg,
                                       explain_prediction,
                                       atol=1e-8,
                                       reg_has_intercept=None):
    X, y, feature_names = boston_train
    reg.fit(X, y)
    res = explain_prediction(reg, X[0], feature_names=feature_names)
    expl_text, expl_html = expls = format_as_all(res, reg)

    assert len(res.targets) == 1
    target = res.targets[0]
    assert target.target == 'y'
    get_pos_neg_features = lambda fw: (
        get_all_features(fw.pos, with_weights=True),
        get_all_features(fw.neg, with_weights=True))
    pos, neg = get_pos_neg_features(target.feature_weights)
    assert 'LSTAT' in pos or 'LSTAT' in neg

    if reg_has_intercept is None:
        reg_has_intercept = has_intercept(reg)
    if reg_has_intercept:
        assert '<BIAS>' in pos or '<BIAS>' in neg
        assert '<BIAS>' in expl_text
        assert '&lt;BIAS&gt;' in expl_html
    else:
        assert '<BIAS>' not in pos and '<BIAS>' not in neg
        assert '<BIAS>' not in expl_text
        assert 'BIAS' not in expl_html

    for expl in [expl_text, expl_html]:
        assert 'LSTAT' in expl
        assert '(score' in expl
    assert "'y'" in expl_text
    assert '<b>y</b>' in strip_blanks(expl_html)

    for expl in expls:
        assert_feature_values_present(expl, feature_names, X[0])

    assert res == explain_prediction(reg, X[0], feature_names=feature_names)
    check_targets_scores(res, atol=atol)

    flt_res = explain_prediction(
        reg,
        X[0],
        feature_names=feature_names,
        feature_filter=lambda name, v: name != 'LSTAT')
    format_as_all(flt_res, reg)
    flt_target = flt_res.targets[0]
    flt_pos, flt_neg = get_pos_neg_features(flt_target.feature_weights)
    assert 'LSTAT' not in flt_pos and 'LSTAT' not in flt_neg
    flt_all = dict(flt_pos, **flt_neg)
    expected = dict(pos, **neg)
    expected.pop('LSTAT')
    assert flt_all == expected
def test_unsupported():
    vec = CountVectorizer()
    clf = BaseEstimator()
    doc = 'doc'
    res = explain_prediction(clf, doc, vec=vec)
    assert 'BaseEstimator' in res.error
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
    with pytest.raises(TypeError):
        explain_prediction(clf, doc, unknown_argument=True)
def test_explain_tree_regressor_multitarget(reg):
    X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
                           random_state=42)
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    for expl in format_as_all(res, reg):
        for target in ['y0', 'y1', 'y2']:
            assert target in expl
        assert 'BIAS' in expl
        assert any('x%d' % i in expl for i in range(10))
    check_targets_scores(res)

    top_targets_res = explain_prediction(reg, X[0], top_targets=1)
    assert len(top_targets_res.targets) == 1
Exemplo n.º 12
0
def test_explain_regression_hashing_vectorizer(newsgroups_train_binary):
    docs, y, target_names = newsgroups_train_binary
    vec = HashingVectorizer(norm=None)
    clf = LinearRegression()
    clf.fit(vec.fit_transform(docs), y)

    # Setting large "top" in order to compare it with CountVectorizer below
    # (due to small differences in the coefficients they might have cutoffs
    # at different points).
    res = explain_prediction(clf,
                             docs[0],
                             vec=vec,
                             target_names=[target_names[1]],
                             top=1000)
    expl, _ = format_as_all(res, clf)
    assert len(res.targets) == 1
    e = res.targets[0]
    assert e.target == 'comp.graphics'
    neg = get_all_features(e.feature_weights.neg)
    assert 'objective' in neg
    assert 'that' in neg
    assert 'comp.graphics' in expl
    assert 'objective' in expl
    assert 'that' in expl

    # HashingVectorizer with norm=None is "the same" as CountVectorizer,
    # so we can compare it and check that explanation is almost the same.
    count_vec = CountVectorizer()
    count_clf = LinearRegression()
    count_clf.fit(count_vec.fit_transform(docs), y)
    count_res = explain_prediction(count_clf,
                                   docs[0],
                                   vec=count_vec,
                                   target_names=[target_names[1]],
                                   top=1000)
    pprint(count_res)
    count_expl, _ = format_as_all(count_res, count_clf)
    print(count_expl)

    for key in ['pos', 'neg']:
        values, count_values = [
            sorted(get_names_coefs(getattr(r.targets[0].feature_weights, key)))
            for r in [res, count_res]
        ]
        assert len(values) == len(count_values)
        for (name, coef), (count_name,
                           count_coef) in zip(values, count_values):
            assert name == count_name
            assert abs(coef - count_coef) < 0.05
def test_explain_tree_clf_multiclass(clf, iris_train):
    X, y, feature_names, target_names = iris_train
    clf.fit(X, y)
    res = explain_prediction(
        clf, X[0], target_names=target_names, feature_names=feature_names)
    for expl in format_as_all(res, clf):
        for target in target_names:
            assert target in expl
        assert 'BIAS' in expl
        assert any(f in expl for f in feature_names)
        assert_feature_values_present(expl, feature_names, X[0])
    check_targets_scores(res)

    top_targets_res = explain_prediction(clf, X[0], top_targets=1)
    assert len(top_targets_res.targets) == 1
Exemplo n.º 14
0
def assert_tree_explain_prediction_single_target(clf, X, feature_names):
    get_res = lambda _x, **kwargs: explain_prediction(
        clf, _x, feature_names=feature_names, **kwargs)
    res = get_res(X[0])
    for expl in format_as_all(res, clf):
        assert_feature_values_present(expl, feature_names, X[0])

    checked_flt = False
    all_expls = []
    for x in X[:5]:
        res = get_res(x)
        text_expl = format_as_text(res, show=fields.WEIGHTS)
        print(text_expl)
        assert '<BIAS>' in text_expl
        check_targets_scores(res)
        all_expls.append(text_expl)

        get_all = lambda fw: get_all_features(fw.pos) | get_all_features(fw.neg
                                                                         )
        all_features = get_all(res.targets[0].feature_weights)
        if len(all_features) > 1:
            f = list(all_features - {'<BIAS>'})[0]
            flt_res = get_res(x, feature_filter=lambda name, _: name != f)
            flt_features = get_all(flt_res.targets[0].feature_weights)
            assert flt_features == (all_features - {f})
            checked_flt = True

    assert checked_flt
    assert any(f in ''.join(all_expls) for f in feature_names)
Exemplo n.º 15
0
def explain(model_path):
    wordseg = Wordsegmenter("./bin/pyseg.so", "./bin/qsegconf.ini")
    sent_word2vec_path = "./data/word2vec.query.bin"
    sent_vocab_path = "./data/word2vec.query.vocab"
    sent_model_path = "./data/sif.model"

    sent_word2vec = KeyedVectors.load_word2vec_format(sent_word2vec_path,
                                                      binary=True)
    sent_vocab_dict = load_vocab(sent_vocab_path)
    sent_model = joblib.load(sent_model_path)

    tfidf_count_hash_vectorModels = VectorModels()

    ner_dict_path = "./data/ner.dict"
    syn_dict_path = "./data/syn.dict"
    ner_dict, syn_dict = load_ner_dict(ner_dict_path, syn_dict_path)

    model = joblib.load(model_path)

    pd.set_option('display.max_rows', None)

    explain = eli5.explain_weights(model, top=None)
    explain = eli5.format_as_text(explain)
    print explain

    feature_names = []
    column_names = ["qid", "ql", "qr"]
    #reader = pd.read_csv(in_path, sep="\t", dtype="str", names=column_names, chunksize=100)
    reader = pd.read_csv(sys.stdin,
                         sep="\t",
                         dtype="str",
                         names=column_names,
                         chunksize=1)
    first_chunk = True
    feature_extractor = lambda row: extract_features(
        wordseg, row["ql"], row["qr"], tfidf_count_hash_vectorModels,
        sent_word2vec, sent_vocab_dict, sent_model, ner_dict, syn_dict)
    for data in reader:
        _ = data.fillna("", inplace=True)

        X = data[["ql", "qr"]].apply(feature_extractor, axis=1)
        X_features = X.apply(pd.Series)
        feature_names = X_features.columns.values.tolist()
        X_features = X_features[feature_names]
        y_preds = model.predict_proba(X_features,
                                      ntree_limit=model.best_ntree_limit)
        y_preds = map(lambda o: o[1], y_preds)
        data = pd.concat([data, X_features], axis=1)
        data = data.assign(predict=y_preds)

        #if first_chunk:
        #    data.to_csv(in_path + ".predict", header=True, sep="\t", mode="w")
        #    first_chunk = False
        #else:
        #    data.to_csv(in_path + ".predict", header=False, sep="\t", mode="a")
        data.to_csv(sys.stdout, header=False, sep="\t")
        explain = eli5.explain_prediction(model, X_features.iloc[0])
        explain = eli5.format_as_text(explain)
        print explain
        print X_features.iloc[0]
Exemplo n.º 16
0
def test_explain_linear_classifiers_unsupported_kernels(
        clf, newsgroups_train_binary):
    docs, y, target_names = newsgroups_train_binary
    vec = TfidfVectorizer()
    clf.fit(vec.fit_transform(docs), y)
    res = explain_prediction(clf, docs[0], vec=vec)
    assert 'supported' in res.error
Exemplo n.º 17
0
def explore_final_model():
  #https://github.com/gameofdimension/xgboost_explainer/blob/master/xgboost_explainer_demo.ipynb
  
  nr_labels = len(y)
  value_counts = y.value_counts()
  perc_per_label = {k:round(100 * v/float(nr_labels),2) for k,v in value_counts.items()}
  print('value counts:', y.value_counts())
  print('perc per label:', perc_per_label)

  model = pickle.load(open(filename_model, "rb"))
  model_feature_names = model.attr('feature_names').split('|')    
  index_to_class = json.loads(model.attr('index_to_class'))
  print(index_to_class)
  classes = [index_to_class[k] for k in sorted(index_to_class.keys())]
  print(classes)
  
  print('eli5 explain weights (gain):\n',eli5.format_as_text(eli5.explain_weights(model, top=10))) #gain
  
  df_test = pd.read_json(open(test_filename, "r"))
  df_test = df_test.head(5)
  feature_extractor = FeatureExtractor(df_test)
  X_test, X_test_featurenames = feature_extractor.get_features_pred_instances(df_test, model_feature_names)
  
  
  print(X)
  print(set(X.dtypes))
#   print(X.iloc[0])
  print(eli5.format_as_text(eli5.explain_prediction(model, X_test.head(1), target_names = classes, top = 10, feature_names = X_test_featurenames)))
Exemplo n.º 18
0
def print_eli5(click_data, category):
    pred = pd.read_csv(category + '_xy.csv')
    model = joblib.load(category + ".h5")
    pred = pred.loc[(pred['grid_x'] == click_data['points'][0]['lon']) &
                    (pred['grid_y'] == click_data['points'][0]['lat']), :]
    pred_sqr = pred['eurogrid_0250_1'].values[0]
    dane_model = df.loc[df['eurogrid_0250_1'] == pred_sqr, :]
    dict_ = eli5.format_as_dataframe(eli5.explain_weights(model))
    cols = dict_['feature'].values
    maping = {}
    for i in range(len(cols)):
        maping['x' + str(i)] = cols[i]


#    print(dane_model.columns)
    expl = dane_model.loc[:, cols]
    #    print(expl.head())
    all_cols = itertools.permutations(cols)
    for cols in all_cols:
        try:
            expl = expl.loc[:, list(cols)]
            expl = eli5.formatters.format_as_dataframe(
                eli5.explain_prediction(model, expl))
            break
        except:
            continue
    expl['feature'] = expl['feature'].apply(lambda x: map_x(x, maping))
    return generate_table(expl)
Exemplo n.º 19
0
def result():
    tweet = str(request.data)
    explain = explain_prediction(gnb,
                                 tweet,
                                 vec=tfid,
                                 target_names=['known weird', 'less weird'])
    return str(format_as_text(explain))
Exemplo n.º 20
0
def assert_multiclass_linear_classifier_explained(newsgroups_train, clf,
                                                  explain_prediction):
    docs, y, target_names = newsgroups_train
    vec = TfidfVectorizer()

    X = vec.fit_transform(docs)
    clf.fit(X, y)

    get_res = lambda: explain_prediction(
        clf, docs[0], vec=vec, target_names=target_names, top=20)
    res = get_res()
    pprint(res)
    expl_text, expl_html = format_as_all(res, clf)

    for e in res.targets:
        if e.target != 'comp.graphics':
            continue
        pos = get_all_features(e.feature_weights.pos)
        assert 'file' in pos

    for expl in [expl_text, expl_html]:
        for label in target_names:
            assert str(label) in expl
        assert 'file' in expl

    assert res == get_res()
def test_explain_one_class_svm():
    X = np.array([[0, 0], [0, 1], [5, 3], [93, 94], [90, 91]])
    clf = OneClassSVM(kernel='linear', random_state=42).fit(X)
    res = explain_prediction(clf, X[0])
    assert res.targets[0].score < 0
    for expl in format_as_all(res, clf):
        assert 'BIAS' in expl
        assert 'x0' not in expl
        assert 'x1' not in expl

    res = explain_prediction(clf, X[4])
    assert res.targets[0].score > 0
    for expl in format_as_all(res, clf):
        assert 'BIAS' in expl
        assert 'x0' in expl
        assert 'x1' in expl
Exemplo n.º 22
0
def test_explain_prediction_clf_binary(newsgroups_train_binary_big):
    docs, ys, target_names = newsgroups_train_binary_big
    vec = CountVectorizer(stop_words='english', dtype=np.float64)
    clf = LGBMClassifier(n_estimators=100,
                         max_depth=2,
                         min_child_samples=1,
                         min_child_weight=1)
    xs = vec.fit_transform(docs)
    clf.fit(xs, ys)
    get_res = lambda **kwargs: explain_prediction(
        clf,
        'computer graphics in space: a sign of atheism',
        vec=vec,
        target_names=target_names,
        **kwargs)
    res = get_res()
    for expl in format_as_all(res, clf, show_feature_values=True):
        assert 'graphics' in expl
    check_targets_scores(res)
    weights = res.targets[0].feature_weights
    pos_features = get_all_features(weights.pos)
    neg_features = get_all_features(weights.neg)
    assert 'graphics' in pos_features
    assert 'computer' in pos_features
    assert 'atheism' in neg_features

    flt_res = get_res(feature_re='gra')
    flt_pos_features = get_all_features(flt_res.targets[0].feature_weights.pos)
    assert 'graphics' in flt_pos_features
    assert 'computer' not in flt_pos_features
 def fi_eli(self, instance, prediction, model):
     """
     Parameters
     ----------
         instance: array
             The instance which prediction's interpretation(s) Altruist will investigate.
         prediction: None or any
             it is not used in this function, but kept for consistency
         model:
             The machine learning model made the prediction
             
     Returns
     -------
     list
         The feature importances provided by Eli5
     """
     fn = [i for i in range(len(instance))]
     temp = format_as_dataframe(explain_prediction(model, instance,top=None))
     temp.drop(['target', 'value'],axis=1,inplace=True)
     temp = temp[temp.feature != '<BIAS>']
     def remove_x(x):
         return int(x.replace('x',''))
     temp['feature'] = temp['feature'].apply(remove_x)
     zero = [j for j in fn if j not in temp['feature'].values]
     for z in zero:
         temp = temp.append({'feature':z,'weight':0}, ignore_index=True)
     temp = temp.sort_values(by=['feature'])
     return temp.values[:,1]
Exemplo n.º 24
0
def explain_pred(input_data, model):
    y_preds = []
    y_probs = []
    encoded_htmls = []
    for i in input_data:
        expl = eli5.explain_prediction(
            model.steps[-1][1],
            i,
            model.steps[0][1],
            target_names=['Compliant', 'Not Compliant'],
            top=10)
        html_explanation = format_as_html(expl,
                                          force_weights=False,
                                          show_feature_values=True).replace(
                                              "\n", "").strip()
        encoded_html = base64.b64encode(
            bytes(html_explanation, encoding='utf-8'))
        encoded_htmls.append(encoded_html)
        expl_dict = format_as_dict(expl)
        targets = expl_dict['targets'][0]
        target = targets['target']
        y_pred = 1 if target.startswith('N') else 0
        y_prob = targets['proba']
        if len(i.split()) < 3:
            # one or two words can't be non-compliant
            y_pred = 0
            y_prob = 1.0
        y_prob = f'{round(y_prob, 3) * 100}%'
    y_preds.append(y_pred)
    y_probs.append(y_prob)
    inferences = np.column_stack((y_probs, y_preds, encoded_htmls))

    return inferences
def test_explain_tree_classifier_text(clf, newsgroups_train_big):
    docs, y, target_names = newsgroups_train_big
    vec = CountVectorizer(binary=True, stop_words='english')
    X = vec.fit_transform(docs)
    clf.fit(X, y)
    res = explain_prediction(clf, docs[0], vec=vec, target_names=target_names)
    check_targets_scores(res)
    format_as_all(res, clf)
Exemplo n.º 26
0
def test_unsupported():
    vec = CountVectorizer()
    clf = BaseEstimator()
    res = explain_prediction(clf, 'hello, world', vec=vec)
    assert 'BaseEstimator' in res.error
    for expl in format_as_all(res, clf):
        assert 'Error' in expl
        assert 'BaseEstimator' in expl
Exemplo n.º 27
0
def explain_prediction_df(estimator, doc, **kwargs):
    # type: (...) -> pd.DataFrame
    """ Explain prediction and export explanation to ``pandas.DataFrame``
    All keyword arguments are passed to :func:`eli5.explain_prediction`.
    Weights of all features are exported by default.
    """
    kwargs = _set_defaults(kwargs)
    return format_as_dataframe(
        eli5.explain_prediction(estimator, doc, **kwargs))
Exemplo n.º 28
0
def test_explain_prediction_single_leaf_tree(iris_train):
    X, y, feature_names, target_names = iris_train
    clf = LGBMClassifier(n_estimators=100)
    clf.fit(X, y)
    # at least one of the trees has only a single leaf

    res = explain_prediction(clf, X[0], target_names=target_names)
    format_as_all(res, clf)
    check_targets_scores(res)
Exemplo n.º 29
0
def explain_score(lead_no):
	nb_feat = 20
	lead_no = int(lead_no)
	get = test[test[ID]==lead_no].reset_index(drop=True)
	exps = eli5.explain_prediction(xgb_clf, get[xgb_clf.booster().feature_names].iloc[0], top=nb_feat)
	score_explain = eli5.format_as_html(exps, show=('targets', 'feature_importances'), show_feature_values=True)
	target_flag = get[TARGET].values[0]
	proba_target= get[TARGET_PROBA].values[0]
	proba_target = round(proba_target, 3)
	return render_template('explain.html', lead_no=lead_no, score_explain=score_explain, target_flag=target_flag, proba_target=proba_target, nb_feat=nb_feat)
def test_explain_prediction_pandas(reg, boston_train):
    pd = pytest.importorskip('pandas')
    X, y, feature_names = boston_train
    df = pd.DataFrame(X, columns=feature_names)
    reg.fit(df, y)
    res = explain_prediction(reg, df.iloc[0])
    for expl in format_as_all(res, reg):
        assert 'PTRATIO' in expl
        if has_intercept(reg):
            assert 'BIAS' in expl