Пример #1
0
def test_estimator_does_not_support_feature_names():
    """SelectFromModel works with estimators that do not support feature_names_in_.

    Non-regression test for #21949.
    """
    pytest.importorskip("pandas")
    X, y = datasets.load_iris(as_frame=True, return_X_y=True)
    all_feature_names = set(X.columns)

    def importance_getter(estimator):
        return np.arange(X.shape[1])

    selector = SelectFromModel(
        MinimalClassifier(), importance_getter=importance_getter
    ).fit(X, y)

    # selector learns the feature names itself
    assert_array_equal(selector.feature_names_in_, X.columns)

    feature_names_out = set(selector.get_feature_names_out())
    assert feature_names_out < all_feature_names

    with pytest.warns(None) as records:
        selector.transform(X.iloc[1:3])
    assert not [w.message for w in records]
Пример #2
0
def test_prefit_get_feature_names_out():
    """Check the interaction between prefit and the feature names."""
    clf = RandomForestClassifier(n_estimators=2, random_state=0)
    clf.fit(data, y)
    model = SelectFromModel(clf, prefit=True, max_features=1)

    # FIXME: the error message should be improved. Raising a `NotFittedError`
    # would be better since it would force to validate all class attribute and
    # create all the necessary fitted attribute
    err_msg = "Unable to generate feature names without n_features_in_"
    with pytest.raises(ValueError, match=err_msg):
        model.get_feature_names_out()

    model.fit(data, y)
    feature_names = model.get_feature_names_out()
    assert feature_names == ["x3"]