def assert_multitarget_linear_regression_explained(reg, explain_prediction):
    X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
                           random_state=42)
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    expl_text, expl_html = format_as_all(res, reg)

    assert len(res.targets) == 3
    target = res.targets[1]
    assert target.target == 'y1'
    pos, neg = (get_all_features(target.feature_weights.pos),
                get_all_features(target.feature_weights.neg))
    assert 'x8' in pos or 'x8' in neg
    if has_intercept(reg):
        assert '<BIAS>' in pos or '<BIAS>' in neg

    assert 'x8' in expl_text
    if has_intercept(reg):
        assert '<BIAS>' in expl_text
    assert "'y2'" in expl_text

    assert res == explain_prediction(reg, X[0])
    check_targets_scores(res)

    top_targets_res = explain_prediction(reg, X[0], top_targets=1)
    assert len(top_targets_res.targets) == 1
示例#2
0
def test_has_intercept(newsgroups_train):
    vec = TfidfVectorizer()
    X = vec.fit_transform(newsgroups_train[0])
    clf = LogisticRegression()
    clf.fit(X, newsgroups_train[1])
    assert has_intercept(clf)

    clf2 = LogisticRegression(fit_intercept=False)
    clf2.fit(X, newsgroups_train[1])
    assert not has_intercept(clf2)
示例#3
0
def assert_linear_regression_explained(boston_train, reg, explain_prediction):
    X, y, feature_names = boston_train
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    expl_text, expl_html = format_as_all(res, reg)

    assert len(res.targets) == 1
    target = res.targets[0]
    assert target.target == 'y'
    pos, neg = (get_all_features(target.feature_weights.pos),
                get_all_features(target.feature_weights.neg))
    assert 'x11' in pos or 'x11' in neg

    if has_intercept(reg):
        assert '<BIAS>' in pos or '<BIAS>' in neg
        assert '<BIAS>' in expl_text
        assert '&lt;BIAS&gt;' in expl_html
    else:
        assert '<BIAS>' not in pos and '<BIAS>' not in neg
        assert '<BIAS>' not in expl_text
        assert 'BIAS' not in expl_html

    for expl in [expl_text, expl_html]:
        assert 'x11' in expl
        assert '(score' in expl
    assert "'y'" in expl_text
    assert '<b>y</b>' in strip_blanks(expl_html)

    assert res == explain_prediction(reg, X[0])
示例#4
0
def explain_prediction_linear_classifier(clf,
                                         doc,
                                         vec=None,
                                         top=None,
                                         target_names=None,
                                         targets=None,
                                         feature_names=None,
                                         vectorized=False):
    """ Explain prediction of a linear classifier. """
    vec, feature_names = _handle_vec(clf, doc, vec, vectorized, feature_names)
    X = _get_X(doc, vec=vec, vectorized=vectorized)

    if is_probabilistic_classifier(clf):
        try:
            proba, = clf.predict_proba(X)
        except NotImplementedError:
            proba = None
    else:
        proba = None
    score, = clf.decision_function(X)

    if has_intercept(clf):
        X = _add_intercept(X)
    x, = X

    res = Explanation(
        estimator=repr(clf),
        method='linear model',
        targets=[],
    )

    def _weights(label_id):
        coef = get_coef(clf, label_id)
        scores = _multiply(x, coef)
        return get_top_features(feature_names, scores, top)

    display_names = get_display_names(clf.classes_, target_names, targets)

    if is_multiclass_classifier(clf):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
                proba=proba[label_id] if proba is not None else None,
            )
            _add_weighted_spans(doc, vec, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[1][1],
            feature_weights=_weights(0),
            score=score,
            proba=proba[1] if proba is not None else None,
        )
        _add_weighted_spans(doc, vec, target_expl)
        res.targets.append(target_expl)

    return res
def test_explain_prediction_pandas(reg, boston_train):
    pd = pytest.importorskip('pandas')
    X, y, feature_names = boston_train
    df = pd.DataFrame(X, columns=feature_names)
    reg.fit(df, y)
    res = explain_prediction(reg, df.iloc[0])
    for expl in format_as_all(res, reg):
        assert 'PTRATIO' in expl
        if has_intercept(reg):
            assert 'BIAS' in expl
示例#6
0
def assert_linear_regression_explained(boston_train,
                                       reg,
                                       explain_prediction,
                                       atol=1e-8,
                                       reg_has_intercept=None):
    X, y, feature_names = boston_train
    reg.fit(X, y)
    res = explain_prediction(reg, X[0], feature_names=feature_names)
    expl_text, expl_html = expls = format_as_all(res, reg)

    assert len(res.targets) == 1
    target = res.targets[0]
    assert target.target == 'y'
    get_pos_neg_features = lambda fw: (
        get_all_features(fw.pos, with_weights=True),
        get_all_features(fw.neg, with_weights=True))
    pos, neg = get_pos_neg_features(target.feature_weights)
    assert 'LSTAT' in pos or 'LSTAT' in neg

    if reg_has_intercept is None:
        reg_has_intercept = has_intercept(reg)
    if reg_has_intercept:
        assert '<BIAS>' in pos or '<BIAS>' in neg
        assert '<BIAS>' in expl_text
        assert '&lt;BIAS&gt;' in expl_html
    else:
        assert '<BIAS>' not in pos and '<BIAS>' not in neg
        assert '<BIAS>' not in expl_text
        assert 'BIAS' not in expl_html

    for expl in [expl_text, expl_html]:
        assert 'LSTAT' in expl
        assert '(score' in expl
    assert "'y'" in expl_text
    assert '<b>y</b>' in strip_blanks(expl_html)

    for expl in expls:
        assert_feature_values_present(expl, feature_names, X[0])

    assert res == explain_prediction(reg, X[0], feature_names=feature_names)
    check_targets_scores(res, atol=atol)

    flt_res = explain_prediction(
        reg,
        X[0],
        feature_names=feature_names,
        feature_filter=lambda name, v: name != 'LSTAT')
    format_as_all(flt_res, reg)
    flt_target = flt_res.targets[0]
    flt_pos, flt_neg = get_pos_neg_features(flt_target.feature_weights)
    assert 'LSTAT' not in flt_pos and 'LSTAT' not in flt_neg
    flt_all = dict(flt_pos, **flt_neg)
    expected = dict(pos, **neg)
    expected.pop('LSTAT')
    assert flt_all == expected
示例#7
0
def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        vectorized=False):
    """ Explain prediction of a linear regressor. """
    vec, feature_names = _handle_vec(reg, doc, vec, vectorized, feature_names)
    X = _get_X(doc, vec=vec, vectorized=vectorized)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = _add_intercept(X)
    x, = X

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )

    def _weights(label_id):
        coef = get_coef(reg, label_id)
        scores = _multiply(x, coef)
        return get_top_features(feature_names, scores, top)

    names = get_default_target_names(reg)
    display_names = get_display_names(names, target_names, targets)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            _add_weighted_spans(doc, vec, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        _add_weighted_spans(doc, vec, target_expl)
        res.targets.append(target_expl)

    return res
示例#8
0
def test_explain_linear_regression_one_feature(reg):
    xs, ys = make_regression(n_samples=10, n_features=1, bias=7.5,
                             random_state=42)
    reg.fit(xs, ys)
    res = explain_weights(reg)
    expl_text, expl_html = format_as_all(res, reg)

    for expl in [expl_text, expl_html]:
        assert 'x0' in expl

    if has_intercept(reg):
        assert '<BIAS>' in expl_text
        assert '&lt;BIAS&gt;' in expl_html
示例#9
0
def explain_prediction_linear_regressor(reg,
                                        doc,
                                        vec=None,
                                        top=None,
                                        top_targets=None,
                                        target_names=None,
                                        targets=None,
                                        feature_names=None,
                                        feature_re=None,
                                        feature_filter=None,
                                        vectorized=False):
    """
    Explain prediction of a linear regressor.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``;
    you can pass it instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    regressor ``reg``. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.
    """
    if isinstance(reg, (SVR, NuSVR)) and reg.kernel != 'linear':
        return explain_prediction_sklearn_not_supported(reg, doc)

    vec, feature_names = handle_vec(reg, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True)

    score, = reg.predict(X)

    if has_intercept(reg):
        X = add_intercept(X)
    x = get_X0(X)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    res = Explanation(
        estimator=repr(reg),
        method='linear model',
        targets=[],
        is_regression=True,
    )
    assert res.targets is not None

    _weights = _linear_weights(reg, x, top, feature_names, flt_indices)
    names = get_default_target_names(reg)
    display_names = get_target_display_names(names, target_names, targets,
                                             top_targets, score)

    if is_multitarget_regressor(reg):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target_expl = TargetExplanation(
            target=display_names[0][1],
            feature_weights=_weights(0),
            score=score,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res
示例#10
0
def explain_prediction_linear_classifier(
    clf,
    doc,
    vec=None,
    top=None,
    top_targets=None,
    target_names=None,
    targets=None,
    feature_names=None,
    feature_re=None,
    feature_filter=None,
    vectorized=False,
):
    """
    Explain prediction of a linear classifier.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    classifier. Set it to True if you're passing ``vec``, but ``doc``
    is already vectorized.
    """
    vec, feature_names = handle_vec(clf, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized, to_dense=True)

    proba = predict_proba(clf, X)
    score, = clf.decision_function(X)

    if has_intercept(clf):
        X = add_intercept(X)
    x = get_X0(X)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    res = Explanation(
        estimator=repr(clf),
        method='linear model',
        targets=[],
    )
    assert res.targets is not None

    _weights = _linear_weights(clf, x, top, feature_names, flt_indices)
    classes = getattr(clf, "classes_", ["-1", "1"])  # OneClassSVM support
    display_names = get_target_display_names(classes, target_names, targets,
                                             top_targets, score)

    if is_multiclass_classifier(clf):
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id],
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        if len(display_names) == 1:  # target is passed explicitly
            label_id, target = display_names[0]
        else:
            label_id = 1 if score >= 0 else 0
            target = display_names[label_id][1]
        scale = -1 if label_id == 0 else 1

        target_expl = TargetExplanation(
            target=target,
            feature_weights=_weights(0, scale=scale),
            score=score,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res
示例#11
0
def test_has_intercept(newsgroups_train, clf, intercept):
    vec = TfidfVectorizer()
    X = vec.fit_transform(newsgroups_train[0])
    clf.fit(X, newsgroups_train[1])
    assert has_intercept(clf) == intercept