Пример #1
0
def odd_feature_names(transformer, in_names=None):
    if in_names is None:
        from eli5.sklearn.utils import get_feature_names
        # generate default feature names
        in_names = get_feature_names(transformer, num_features=transformer.n_features_)
    # return a list of strings derived from in_names
    return in_names[1::2]
Пример #2
0
def _col_tfm_names(transformer, in_names=None):
    if in_names is None:
        from eli5.sklearn.utils import get_feature_names
        # generate default feature names
        in_names = get_feature_names(transformer,
                                     num_features=transformer._n_features)
    # return a list of strings derived from in_names
    feature_names = []
    for name, trans, column, _ in transformer._iter(fitted=True):
        if hasattr(transformer, '_df_columns'):
            if ((not isinstance(column, slice))
                    and all(isinstance(col, str) for col in column)):
                names = column
            else:
                names = transformer._df_columns[column]
        else:
            indices = np.arange(transformer._n_features)
            names = ['x%d' % i for i in indices[column]]
        # erm, want to be able to override with in_names maybe???

        if trans == 'drop' or (hasattr(column, '__len__') and not len(column)):
            continue
        if trans == 'passthrough':
            feature_names.extend(names)
            continue
        feature_names.extend([
            name + "__" + f
            for f in transform_feature_names(trans, in_names=names)
        ])
    return feature_names
Пример #3
0
def _handle_vec(clf, doc, vec, vectorized, feature_names):
    if isinstance(vec, HashingVectorizer) and not vectorized:
        vec = InvertableHashingVectorizer(vec)
        vec.fit([doc])
    if is_invhashing(vec) and feature_names is None:
        # Explaining predictions does not need coef_scale,
        # because it is handled by the vectorizer.
        feature_names = vec.get_feature_names(always_signed=False)
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    return vec, feature_names
Пример #4
0
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    coef = estimator.feature_importances_
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]
    indices = argsort_k_largest_positive(coef, top)
    names, values = feature_names[indices], coef[indices]
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
def explain_weights_pipeline(estimator, feature_names=None, **kwargs):
    last_estimator = estimator.steps[-1][1]
    transform_pipeline = Pipeline(estimator.steps[:-1])
    if 'vec' in kwargs:
        feature_names = get_feature_names(feature_names, vec=kwargs.pop('vec'))
    feature_names = transform_feature_names(transform_pipeline, feature_names)
    out = explain_weights(last_estimator,
                          feature_names=feature_names,
                          **kwargs)
    out.estimator = repr(estimator)
    return out
Пример #6
0
def explain_weights_xgboost(xgb,
                            vec=None,
                            top=20,
                            target_names=None,  # ignored
                            targets=None,  # ignored
                            feature_names=None,
                            feature_re=None,
                            feature_filter=None,
                            importance_type='gain',
                            ):
    """
    Return an explanation of an XGBoost estimator (via scikit-learn wrapper
    XGBClassifier or XGBRegressor) as feature importances.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    Parameters
    ----------
    importance_type : str, optional
        A way to get feature importance. Possible values are:

        - 'gain' - the average gain of the feature when it is used in trees
          (default)
        - 'weight' - the number of times a feature is used to split the data
          across all trees
        - 'cover' - the average coverage of the feature when it is used in trees
    """
    coef = _xgb_feature_importances(xgb, importance_type=importance_type)
    num_features = coef.shape[-1]
    feature_names = get_feature_names(
        xgb, vec, feature_names=feature_names, num_features=num_features)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]

    indices = argsort_k_largest_positive(coef, top)
    names, values = feature_names[indices], coef[indices]
    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        description=DESCRIPTION_XGBOOST,
        estimator=repr(xgb),
        method='feature importances',
        is_regression=isinstance(xgb, XGBRegressor),
    )
Пример #7
0
def explain_decision_tree(
        estimator,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        feature_filter=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``targets`` parameter is ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.

    All other keyword arguments are passed to
    `sklearn.tree.export_graphviz`_ function.

    .. _sklearn.tree.export_graphviz: http://scikit-learn.org/stable/modules/generated/sklearn.tree.export_graphviz.html
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    tree_feature_names = feature_names
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    feature_importances = get_feature_importances_filtered(
        estimator.feature_importances_, feature_names, flt_indices, top)

    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(estimator,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=feature_importances,
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(estimator),
        method='decision tree',
    )
Пример #8
0
def explain_decision_tree(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,
        targets=None,  # ignored
        feature_names=None,
        feature_re=None,
        **export_graphviz_kwargs):
    """
    Return an explanation of a decision tree classifier in the
    following format (compatible with random forest explanations)::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            decision_tree={...tree information},
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )

    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    tree_feature_names = feature_names
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
    indices = argsort_k_largest(coef, top)
    names, values = feature_names[indices], coef[indices]
    std = np.zeros_like(values)
    export_graphviz_kwargs.setdefault("proportion", True)
    tree_info = get_tree_info(clf,
                              feature_names=tree_feature_names,
                              class_names=target_names,
                              **export_graphviz_kwargs)

    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        decision_tree=tree_info,
        description=DESCRIPTION_DECISION_TREE,
        estimator=repr(clf),
        method='decision tree',
    )
Пример #9
0
def explain_rf_feature_importance(
    estimator,
    vec=None,
    top=_TOP,
    target_names=None,  # ignored
    targets=None,  # ignored
    feature_names=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a tree-based ensemble estimator.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``feature_names``, ``feature_re`` and ``feature_filter``
    parameters.

    ``target_names`` and ``targets`` parameters are ignored.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the estimator (e.g. a fitted
    CountVectorizer instance); you can pass it instead of ``feature_names``.
    """
    feature_names = get_feature_names(estimator,
                                      vec,
                                      feature_names=feature_names)
    coef = estimator.feature_importances_
    trees = np.array(estimator.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)

    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    if flt_indices is not None:
        coef = coef[flt_indices]
        coef_std = coef_std[flt_indices]

    indices = argsort_k_largest_positive(coef, top)
    names, values, std = feature_names[indices], coef[indices], coef_std[
        indices]
    return Explanation(
        feature_importances=FeatureImportances(
            [FeatureWeight(*x) for x in zip(names, values, std)],
            remaining=np.count_nonzero(coef) - len(indices),
        ),
        description=DESCRIPTION_RANDOM_FOREST,
        estimator=repr(estimator),
        method='feature importances',
    )
Пример #10
0
def test_get_feature_names():
    docs = ['hello world', 'hello', 'world']

    def _names(*args, **kwargs):
        return set(get_feature_names(*args, **kwargs))

    for y in [[0, 1, 2], [0, 1, 0]]:  # multiclass, binary
        vec = CountVectorizer()
        X = vec.fit_transform(docs)

        clf = LogisticRegression()
        clf.fit(X, y)

        fnames = get_feature_names(clf, vec)
        assert isinstance(fnames, FeatureNames)
        assert repr(fnames) == '<FeatureNames: 2 features with bias>'
        assert _names(clf, vec) == {'hello', 'world', '<BIAS>'}
        assert _names(clf, vec, 'B') == {'hello', 'world', 'B'}
        assert _names(clf) == {'x0', 'x1', '<BIAS>'}
        assert _names(clf, feature_names=['a', 'b']) == {'a', 'b', '<BIAS>'}
        assert _names(clf, feature_names=['a', 'b'],
                      bias_name='bias') == {'a', 'b', 'bias'}
        assert _names(clf,
                      feature_names=np.array(['a',
                                              'b'])) == {'a', 'b', '<BIAS>'}
        assert _names(clf,
                      feature_names=FeatureNames(['a', 'b'
                                                  ])) == {'a', 'b', '<BIAS>'}
        assert _names(clf,
                      feature_names=FeatureNames(n_features=2,
                                                 unkn_template='F%d')) == {
                                                     'F0', 'F1', '<BIAS>'
                                                 }

        with pytest.raises(ValueError):
            get_feature_names(clf, feature_names=['a'])

        with pytest.raises(ValueError):
            get_feature_names(clf, feature_names=['a', 'b', 'c'])

        with pytest.raises(ValueError):
            get_feature_names(clf, feature_names=FeatureNames(['a', 'b', 'c']))

        clf2 = LogisticRegression(fit_intercept=False)
        clf2.fit(X, y)
        assert _names(clf2, vec) == {'hello', 'world'}
        assert _names(clf2, feature_names=['hello',
                                           'world']) == {'hello', 'world'}
Пример #11
0
def explain_rf_feature_importance(
        clf,
        vec=None,
        top=_TOP,
        target_names=None,  # ignored
        targets=None,  # ignored
        feature_names=None,
        feature_re=None):
    """
    Return an explanation of a tree-based ensemble classifier in the
    following format::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            feature_importances=[
                FeatureWeight(feature_name, importance, std_deviation),
                ...
            ]
        )
    """
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    coef = clf.feature_importances_
    trees = np.array(clf.estimators_).ravel()
    coef_std = np.std([tree.feature_importances_ for tree in trees], axis=0)

    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
        coef = coef[flt_indices]
        coef_std = coef_std[flt_indices]

    indices = argsort_k_largest(coef, top)
    names, values, std = feature_names[indices], coef[indices], coef_std[
        indices]
    return Explanation(
        feature_importances=[
            FeatureWeight(*x) for x in zip(names, values, std)
        ],
        description=DESCRIPTION_RANDOM_FOREST,
        estimator=repr(clf),
        method='feature importances',
    )
Пример #12
0
 def explain_predictions(self, docs, top=30):
     if not isinstance(self.clf, XGBClassifier):
         raise NotImplementedError
     booster = self.clf.booster()
     xgb_feature_names = {f: i for i, f in enumerate(booster.feature_names)}
     feature_names = get_feature_names(self.clf,
                                       self.vec,
                                       num_features=len(xgb_feature_names))
     feature_names.bias_name = '<BIAS>'
     X = self.vec.transform(docs)
     X = X.tocsc()
     dmatrix = DMatrix(X, missing=self.clf.missing)
     leaf_ids = booster.predict(dmatrix, pred_leaf=True)
     tree_dumps = booster.get_dump(with_stats=True)
     docs_weights = []
     for i, _leaf_ids in enumerate(leaf_ids):
         all_weights = _target_feature_weights(
             _leaf_ids,
             tree_dumps,
             feature_names=feature_names,
             xgb_feature_names=xgb_feature_names)[1]
         weights = np.zeros_like(all_weights)
         idx = X[i].nonzero()[1]
         bias_idx = feature_names.bias_idx
         weights[idx] = all_weights[idx]
         weights[bias_idx] = all_weights[bias_idx]
         docs_weights.append(weights)
     weights = np.mean(docs_weights, axis=0)
     feature_weights = get_top_features(feature_names=np.array(
         [_prettify_feature(f) for f in feature_names]),
                                        coef=weights,
                                        top=top)
     return Explanation(
         estimator=type(self.clf).__name__,
         targets=[TargetExplanation('y', feature_weights=feature_weights)],
     )
Пример #13
0
 def _names(*args, **kwargs):
     return set(get_feature_names(*args, **kwargs))
Пример #14
0
def test_get_feature_names_1dim_coef():
    clf = SGDRegressor(fit_intercept=False, **SGD_KWARGS)
    X, y = make_regression(n_targets=1, n_features=3)
    clf.fit(X, y)
    assert set(get_feature_names(clf)) == {'x0', 'x1', 'x2'}
Пример #15
0
def explain_linear_regressor_weights(
    reg,
    vec=None,
    top=_TOP,
    target_names=None,
    targets=None,
    feature_names=None,
    coef_scale=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a linear regressor weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the regressor ``reg``; you can
    pass it instead of ``feature_names``.

    ``coef_scale`` is a 1D np.ndarray with a scaling coefficient
    for each feature; coef[i] = coef[i] * coef_scale[i] if
    coef_scale[i] is not nan. Use it if you want to scale coefficients
    before displaying them, to take input feature sign or scale in account.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if flt_indices is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_target_display_names(get_default_target_names(reg),
                                             target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
Пример #16
0
def explain_linear_classifier_weights(
    clf,
    vec=None,
    top=_TOP,
    target_names=None,
    targets=None,
    feature_names=None,
    coef_scale=None,
    feature_re=None,
    feature_filter=None,
):
    """
    Return an explanation of a linear classifier weights.

    See :func:`eli5.explain_weights` for description of
    ``top``, ``target_names``, ``targets``, ``feature_names``,
    ``feature_re`` and ``feature_filter`` parameters.

    ``vec`` is a vectorizer instance used to transform
    raw features to the input of the classifier ``clf``
    (e.g. a fitted CountVectorizer instance); you can pass it
    instead of ``feature_names``.

    ``coef_scale`` is a 1D np.ndarray with a scaling coefficient
    for each feature; coef[i] = coef[i] * coef_scale[i] if
    coef_scale[i] is not nan. Use it if you want to scale coefficients
    before displaying them, to take input feature sign or scale in account.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    feature_names, flt_indices = feature_names.handle_filter(
        feature_filter, feature_re)

    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(label_id):
        coef = get_coef(clf, label_id, scale=coef_scale)
        if flt_indices is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_target_display_names(clf.classes_, target_names,
                                             targets)
    if is_multiclass_classifier(clf):
        return Explanation(
            targets=[
                TargetExplanation(target=label,
                                  feature_weights=_features(label_id))
                for label_id, label in display_names
            ],
            description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
    else:
        # for binary classifiers scikit-learn stores a single coefficient
        # vector, which corresponds to clf.classes_[1].
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[1][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_CLF_BINARY + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
Пример #17
0
def explain_linear_regressor_weights(reg,
                                     vec=None,
                                     top=_TOP,
                                     target_names=None,
                                     targets=None,
                                     feature_names=None,
                                     coef_scale=None,
                                     feature_re=None):
    """
    Return an explanation of a linear regressor weights in the following
    format::

        Explanation(
            estimator="<regressor repr>",
            method="<interpretation method>",
            description="<human readable description>",
            targets=[
                TargetExplanation(
                    target="<target name>",
                    feature_weights=FeatureWeights(
                        # positive weights
                        pos=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # negative weights
                        neg=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # A number of features not shown
                        pos_remaining = <int>,
                        neg_remaining = <int>,

                        # Sum of feature weights not shown
                        # pos_remaining_sum = <float>,
                        # neg_remaining_sum = <float>,
                    ),
                ),
                ...
            ]
        )

    To print it use utilities from eli5.formatters.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(reg, vec, feature_names=feature_names)
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)
    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(target_id):
        coef = get_coef(reg, target_id, scale=coef_scale)
        if feature_re is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_display_names(get_default_target_names(reg),
                                      target_names, targets)
    if is_multitarget_regressor(reg):
        return Explanation(
            targets=[
                TargetExplanation(target=target_name,
                                  feature_weights=_features(target_id))
                for target_id, target_name in display_names
            ],
            description=DESCRIPTION_REGRESSION_MULTITARGET + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
    else:
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[0][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_REGRESSION + _extra_caveats,
            estimator=repr(reg),
            method='linear model',
            is_regression=True,
        )
Пример #18
0
def explain_linear_classifier_weights(clf,
                                      vec=None,
                                      top=_TOP,
                                      target_names=None,
                                      targets=None,
                                      feature_names=None,
                                      coef_scale=None,
                                      feature_re=None):
    """
    Return an explanation of a linear classifier weights in the following
    format::

        Explanation(
            estimator="<classifier repr>",
            method="<interpretation method>",
            description="<human readable description>",
            targets=[
                TargetExplanation(
                    target="<class name>",
                    feature_weights=FeatureWeights(
                        # positive weights
                        pos=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # negative weights
                        neg=[
                            (feature_name, coefficient),
                            ...
                        ],

                        # A number of features not shown
                        pos_remaining = <int>,
                        neg_remaining = <int>,

                        # Sum of feature weights not shown
                        # pos_remaining_sum = <float>,
                        # neg_remaining_sum = <float>,
                    ),
                ),
                ...
            ]
        )

    To print it use utilities from eli5.formatters.
    """
    feature_names, coef_scale = handle_hashing_vec(vec, feature_names,
                                                   coef_scale)
    feature_names = get_feature_names(clf, vec, feature_names=feature_names)
    if feature_re is not None:
        feature_names, flt_indices = feature_names.filtered_by_re(feature_re)

    _extra_caveats = "\n" + HASHING_CAVEATS if is_invhashing(vec) else ''

    def _features(label_id):
        coef = get_coef(clf, label_id, scale=coef_scale)
        if feature_re is not None:
            coef = coef[flt_indices]
        return get_top_features(feature_names, coef, top)

    display_names = get_display_names(clf.classes_, target_names, targets)
    if is_multiclass_classifier(clf):
        return Explanation(
            targets=[
                TargetExplanation(target=label,
                                  feature_weights=_features(label_id))
                for label_id, label in display_names
            ],
            description=DESCRIPTION_CLF_MULTICLASS + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )
    else:
        # for binary classifiers scikit-learn stores a single coefficient
        # vector, which corresponds to clf.classes_[1].
        return Explanation(
            targets=[
                TargetExplanation(
                    target=display_names[1][1],
                    feature_weights=_features(0),
                )
            ],
            description=DESCRIPTION_CLF_BINARY + _extra_caveats,
            estimator=repr(clf),
            method='linear model',
        )