def assert_multitarget_linear_regression_explained(reg, explain_prediction): X, y = make_regression(n_samples=100, n_targets=3, n_features=10, random_state=42) reg.fit(X, y) res = explain_prediction(reg, X[0]) expl_text, expl_html = format_as_all(res, reg) assert len(res.targets) == 3 target = res.targets[1] assert target.target == 'y1' pos, neg = (get_all_features(target.feature_weights.pos), get_all_features(target.feature_weights.neg)) assert 'x8' in pos or 'x8' in neg if has_intercept(reg): assert '<BIAS>' in pos or '<BIAS>' in neg assert 'x8' in expl_text if has_intercept(reg): assert '<BIAS>' in expl_text assert "'y2'" in expl_text assert res == explain_prediction(reg, X[0]) check_targets_scores(res) top_targets_res = explain_prediction(reg, X[0], top_targets=1) assert len(top_targets_res.targets) == 1
def test_has_intercept(newsgroups_train): vec = TfidfVectorizer() X = vec.fit_transform(newsgroups_train[0]) clf = LogisticRegression() clf.fit(X, newsgroups_train[1]) assert has_intercept(clf) clf2 = LogisticRegression(fit_intercept=False) clf2.fit(X, newsgroups_train[1]) assert not has_intercept(clf2)
def assert_linear_regression_explained(boston_train, reg, explain_prediction): X, y, feature_names = boston_train reg.fit(X, y) res = explain_prediction(reg, X[0]) expl_text, expl_html = format_as_all(res, reg) assert len(res.targets) == 1 target = res.targets[0] assert target.target == 'y' pos, neg = (get_all_features(target.feature_weights.pos), get_all_features(target.feature_weights.neg)) assert 'x11' in pos or 'x11' in neg if has_intercept(reg): assert '<BIAS>' in pos or '<BIAS>' in neg assert '<BIAS>' in expl_text assert '<BIAS>' in expl_html else: assert '<BIAS>' not in pos and '<BIAS>' not in neg assert '<BIAS>' not in expl_text assert 'BIAS' not in expl_html for expl in [expl_text, expl_html]: assert 'x11' in expl assert '(score' in expl assert "'y'" in expl_text assert '<b>y</b>' in strip_blanks(expl_html) assert res == explain_prediction(reg, X[0])
def explain_prediction_linear_classifier(clf, doc, vec=None, top=None, target_names=None, targets=None, feature_names=None, vectorized=False): """ Explain prediction of a linear classifier. """ vec, feature_names = _handle_vec(clf, doc, vec, vectorized, feature_names) X = _get_X(doc, vec=vec, vectorized=vectorized) if is_probabilistic_classifier(clf): try: proba, = clf.predict_proba(X) except NotImplementedError: proba = None else: proba = None score, = clf.decision_function(X) if has_intercept(clf): X = _add_intercept(X) x, = X res = Explanation( estimator=repr(clf), method='linear model', targets=[], ) def _weights(label_id): coef = get_coef(clf, label_id) scores = _multiply(x, coef) return get_top_features(feature_names, scores, top) display_names = get_display_names(clf.classes_, target_names, targets) if is_multiclass_classifier(clf): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], proba=proba[label_id] if proba is not None else None, ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[1][1], feature_weights=_weights(0), score=score, proba=proba[1] if proba is not None else None, ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) return res
def test_explain_prediction_pandas(reg, boston_train): pd = pytest.importorskip('pandas') X, y, feature_names = boston_train df = pd.DataFrame(X, columns=feature_names) reg.fit(df, y) res = explain_prediction(reg, df.iloc[0]) for expl in format_as_all(res, reg): assert 'PTRATIO' in expl if has_intercept(reg): assert 'BIAS' in expl
def assert_linear_regression_explained(boston_train, reg, explain_prediction, atol=1e-8, reg_has_intercept=None): X, y, feature_names = boston_train reg.fit(X, y) res = explain_prediction(reg, X[0], feature_names=feature_names) expl_text, expl_html = expls = format_as_all(res, reg) assert len(res.targets) == 1 target = res.targets[0] assert target.target == 'y' get_pos_neg_features = lambda fw: ( get_all_features(fw.pos, with_weights=True), get_all_features(fw.neg, with_weights=True)) pos, neg = get_pos_neg_features(target.feature_weights) assert 'LSTAT' in pos or 'LSTAT' in neg if reg_has_intercept is None: reg_has_intercept = has_intercept(reg) if reg_has_intercept: assert '<BIAS>' in pos or '<BIAS>' in neg assert '<BIAS>' in expl_text assert '<BIAS>' in expl_html else: assert '<BIAS>' not in pos and '<BIAS>' not in neg assert '<BIAS>' not in expl_text assert 'BIAS' not in expl_html for expl in [expl_text, expl_html]: assert 'LSTAT' in expl assert '(score' in expl assert "'y'" in expl_text assert '<b>y</b>' in strip_blanks(expl_html) for expl in expls: assert_feature_values_present(expl, feature_names, X[0]) assert res == explain_prediction(reg, X[0], feature_names=feature_names) check_targets_scores(res, atol=atol) flt_res = explain_prediction( reg, X[0], feature_names=feature_names, feature_filter=lambda name, v: name != 'LSTAT') format_as_all(flt_res, reg) flt_target = flt_res.targets[0] flt_pos, flt_neg = get_pos_neg_features(flt_target.feature_weights) assert 'LSTAT' not in flt_pos and 'LSTAT' not in flt_neg flt_all = dict(flt_pos, **flt_neg) expected = dict(pos, **neg) expected.pop('LSTAT') assert flt_all == expected
def explain_prediction_linear_regressor(reg, doc, vec=None, top=None, target_names=None, targets=None, feature_names=None, vectorized=False): """ Explain prediction of a linear regressor. """ vec, feature_names = _handle_vec(reg, doc, vec, vectorized, feature_names) X = _get_X(doc, vec=vec, vectorized=vectorized) score, = reg.predict(X) if has_intercept(reg): X = _add_intercept(X) x, = X res = Explanation( estimator=repr(reg), method='linear model', targets=[], is_regression=True, ) def _weights(label_id): coef = get_coef(reg, label_id) scores = _multiply(x, coef) return get_top_features(feature_names, scores, top) names = get_default_target_names(reg) display_names = get_display_names(names, target_names, targets) if is_multitarget_regressor(reg): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[0][1], feature_weights=_weights(0), score=score, ) _add_weighted_spans(doc, vec, target_expl) res.targets.append(target_expl) return res
def test_explain_linear_regression_one_feature(reg): xs, ys = make_regression(n_samples=10, n_features=1, bias=7.5, random_state=42) reg.fit(xs, ys) res = explain_weights(reg) expl_text, expl_html = format_as_all(res, reg) for expl in [expl_text, expl_html]: assert 'x0' in expl if has_intercept(reg): assert '<BIAS>' in expl_text assert '<BIAS>' in expl_html
def explain_prediction_linear_regressor(reg, doc, vec=None, top=None, top_targets=None, target_names=None, targets=None, feature_names=None, feature_re=None, feature_filter=None, vectorized=False): """ Explain prediction of a linear regressor. See :func:`eli5.explain_prediction` for description of ``top``, ``top_targets``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the classifier ``clf``; you can pass it instead of ``feature_names``. ``vectorized`` is a flag which tells eli5 if ``doc`` should be passed through ``vec`` or not. By default it is False, meaning that if ``vec`` is not None, ``vec.transform([doc])`` is passed to the regressor ``reg``. Set it to True if you're passing ``vec``, but ``doc`` is already vectorized. """ if isinstance(reg, (SVR, NuSVR)) and reg.kernel != 'linear': return explain_prediction_sklearn_not_supported(reg, doc) vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names) X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True) score, = reg.predict(X) if has_intercept(reg): X = add_intercept(X) x = get_X0(X) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re, x) res = Explanation( estimator=repr(reg), method='linear model', targets=[], is_regression=True, ) assert res.targets is not None _weights = _linear_weights(reg, x, top, feature_names, flt_indices) names = get_default_target_names(reg) display_names = get_target_display_names(names, target_names, targets, top_targets, score) if is_multitarget_regressor(reg): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) else: target_expl = TargetExplanation( target=display_names[0][1], feature_weights=_weights(0), score=score, ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) return res
def explain_prediction_linear_classifier( clf, doc, vec=None, top=None, top_targets=None, target_names=None, targets=None, feature_names=None, feature_re=None, feature_filter=None, vectorized=False, ): """ Explain prediction of a linear classifier. See :func:`eli5.explain_prediction` for description of ``top``, ``top_targets``, ``target_names``, ``targets``, ``feature_names``, ``feature_re`` and ``feature_filter`` parameters. ``vec`` is a vectorizer instance used to transform raw features to the input of the classifier ``clf`` (e.g. a fitted CountVectorizer instance); you can pass it instead of ``feature_names``. ``vectorized`` is a flag which tells eli5 if ``doc`` should be passed through ``vec`` or not. By default it is False, meaning that if ``vec`` is not None, ``vec.transform([doc])`` is passed to the classifier. Set it to True if you're passing ``vec``, but ``doc`` is already vectorized. """ vec, feature_names = handle_vec(clf, doc, vec, vectorized, feature_names) X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True) proba = predict_proba(clf, X) score, = clf.decision_function(X) if has_intercept(clf): X = add_intercept(X) x = get_X0(X) feature_names, flt_indices = feature_names.handle_filter( feature_filter, feature_re, x) res = Explanation( estimator=repr(clf), method='linear model', targets=[], ) assert res.targets is not None _weights = _linear_weights(clf, x, top, feature_names, flt_indices) classes = getattr(clf, "classes_", ["-1", "1"]) # OneClassSVM support display_names = get_target_display_names(classes, target_names, targets, top_targets, score) if is_multiclass_classifier(clf): for label_id, label in display_names: target_expl = TargetExplanation( target=label, feature_weights=_weights(label_id), score=score[label_id], proba=proba[label_id] if proba is not None else None, ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) else: if len(display_names) == 1: # target is passed explicitly label_id, target = display_names[0] else: label_id = 1 if score >= 0 else 0 target = display_names[label_id][1] scale = -1 if label_id == 0 else 1 target_expl = TargetExplanation( target=target, feature_weights=_weights(0, scale=scale), score=score, proba=proba[label_id] if proba is not None else None, ) add_weighted_spans(doc, vec, vectorized, target_expl) res.targets.append(target_expl) return res
def test_has_intercept(newsgroups_train, clf, intercept): vec = TfidfVectorizer() X = vec.fit_transform(newsgroups_train[0]) clf.fit(X, newsgroups_train[1]) assert has_intercept(clf) == intercept