Пример #1
0
def get_decision_path_explanation(estimator, doc, vec, vectorized,
                                  x, feature_names,
                                  feature_filter, feature_re, top,
                                  original_display_names,
                                  target_names, targets, top_targets,
                                  is_regression, is_multiclass, proba,
                                  get_score_weights):
    # type: (...) -> Explanation

    display_names = get_target_display_names(
        original_display_names, target_names, targets, top_targets, proba)
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def get_top_features(weights, scale=1.0):
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    explanation = Explanation(
        estimator=repr(estimator),
        method='decision paths',
        description={
            (False, False): DESCRIPTION_CLF_BINARY,
            (False, True): DESCRIPTION_CLF_MULTICLASS,
            (True, False): DESCRIPTION_REGRESSION,
        }[is_regression, is_multiclass],
        is_regression=is_regression,
        targets=[],
    )
    assert explanation.targets is not None

    if is_multiclass:
        for label_id, label in display_names:
            score, all_feature_weights = get_score_weights(label_id)
            target_expl = TargetExplanation(
                target=label,
                feature_weights=get_top_features(all_feature_weights),
                score=score,
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            explanation.targets.append(target_expl)
    else:
        score, all_feature_weights = get_score_weights(0)
        if is_regression:
            target, scale, label_id = display_names[-1][1], 1.0, 1
        else:
            target, scale, label_id = get_binary_target_scale_label_id(
                score, display_names, proba)

        target_expl = TargetExplanation(
            target=target,
            feature_weights=get_top_features(all_feature_weights, scale),
            score=score,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        explanation.targets.append(target_expl)

    return explanation
Пример #2
0
def explain_prediction_tree_classifier(clf,
                                       doc,
                                       vec=None,
                                       top=None,
                                       top_targets=None,
                                       target_names=None,
                                       targets=None,
                                       feature_names=None,
                                       feature_re=None,
                                       feature_filter=None,
                                       vectorized=False):
    """ Explain prediction of a tree classifier.

    See :func:`eli5.explain_prediction` for description of
    ``top``, ``top_targets``, ``target_names``, ``targets``,
    ``feature_names``, ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``vectorized`` is a flag which tells eli5 if ``doc`` should be
    passed through ``vec`` or not. By default it is False, meaning that
    if ``vec`` is not None, ``vec.transform([doc])`` is passed to the
    classifier. Set it to True if you're passing ``vec``,
    but ``doc`` is already vectorized.

    Method for determining feature importances follows an idea from
    http://blog.datadive.net/interpreting-random-forests/.
    Feature weights are calculated by following decision paths in trees
    of an ensemble (or a single tree for DecisionTreeClassifier).
    Each node of the tree has an output score, and contribution of a feature
    on the decision path is how much the score changes from parent to child.
    Weights of all features sum to the output score or proba of the estimator.
    """
    vec, feature_names = handle_vec(clf, doc, vec, vectorized, feature_names)
    X = get_X(doc, vec=vec, vectorized=vectorized)
    if feature_names.bias_name is None:
        # Tree estimators do not have an intercept, but here we interpret
        # them as having an intercept
        feature_names.bias_name = '<BIAS>'

    proba = predict_proba(clf, X)
    if hasattr(clf, 'decision_function'):
        score, = clf.decision_function(X)
    else:
        score = None

    is_multiclass = clf.n_classes_ > 2
    feature_weights = _trees_feature_weights(clf, X, feature_names,
                                             clf.n_classes_)
    x = get_X0(add_intercept(X))
    flt_feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re, x)

    def _weights(label_id, scale=1.0):
        weights = feature_weights[:, label_id]
        return get_top_features_filtered(x, flt_feature_names, flt_indices,
                                         weights, top, scale)

    res = Explanation(
        estimator=repr(clf),
        method='decision path',
        targets=[],
        description=(DESCRIPTION_TREE_CLF_MULTICLASS
                     if is_multiclass else DESCRIPTION_TREE_CLF_BINARY),
    )
    assert res.targets is not None

    display_names = get_target_display_names(
        clf.classes_,
        target_names,
        targets,
        top_targets,
        score=score if score is not None else proba)

    if is_multiclass:
        for label_id, label in display_names:
            target_expl = TargetExplanation(
                target=label,
                feature_weights=_weights(label_id),
                score=score[label_id] if score is not None else None,
                proba=proba[label_id] if proba is not None else None,
            )
            add_weighted_spans(doc, vec, vectorized, target_expl)
            res.targets.append(target_expl)
    else:
        target, scale, label_id = get_binary_target_scale_label_id(
            score, display_names, proba)
        target_expl = TargetExplanation(
            target=target,
            feature_weights=_weights(label_id, scale=scale),
            score=score if score is not None else None,
            proba=proba[label_id] if proba is not None else None,
        )
        add_weighted_spans(doc, vec, vectorized, target_expl)
        res.targets.append(target_expl)

    return res