Пример #1
0
def test_stacking_classifier_iris(cv, final_estimator, passthrough):
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, y_test = train_test_split(scale(X_iris),
                                                        y_iris,
                                                        stratify=y_iris,
                                                        random_state=42)
    estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())]
    clf = StackingClassifier(estimators=estimators,
                             final_estimator=final_estimator,
                             cv=cv,
                             passthrough=passthrough)
    clf.fit(X_train, y_train)
    clf.predict(X_test)
    clf.predict_proba(X_test)
    assert clf.score(X_test, y_test) > 0.8

    X_trans = clf.transform(X_test)
    expected_column_count = 10 if passthrough else 6
    assert X_trans.shape[1] == expected_column_count
    if passthrough:
        assert_allclose(X_test, X_trans[:, -4:])

    clf.set_params(lr='drop')
    clf.fit(X_train, y_train)
    clf.predict(X_test)
    clf.predict_proba(X_test)
    if final_estimator is None:
        # LogisticRegression has decision_function method
        clf.decision_function(X_test)

    X_trans = clf.transform(X_test)
    expected_column_count_drop = 7 if passthrough else 3
    assert X_trans.shape[1] == expected_column_count_drop
    if passthrough:
        assert_allclose(X_test, X_trans[:, -4:])
Пример #2
0
def test_stacking_classifier_drop_column_binary_classification():
    # check that a column is dropped in binary classification
    X, y = load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, _ = train_test_split(scale(X),
                                                   y,
                                                   stratify=y,
                                                   random_state=42)

    # both classifiers implement 'predict_proba' and will both drop one column
    estimators = [('lr', LogisticRegression()),
                  ('rf', RandomForestClassifier(random_state=42))]
    clf = StackingClassifier(estimators=estimators, cv=3)

    clf.fit(X_train, y_train)
    X_trans = clf.transform(X_test)
    assert X_trans.shape[1] == 2

    # LinearSVC does not implement 'predict_proba' and will not drop one column
    estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())]
    clf.set_params(estimators=estimators)

    clf.fit(X_train, y_train)
    X_trans = clf.transform(X_test)
    assert X_trans.shape[1] == 2
Пример #3
0
def test_stacking_classifier_sparse_passthrough(fmt):
    # Check passthrough behavior on a sparse X matrix
    X_train, X_test, y_train, _ = train_test_split(
        sparse.coo_matrix(scale(X_iris)).asformat(fmt),
        y_iris, random_state=42
    )
    estimators = [('lr', LogisticRegression()), ('svc', LinearSVC())]
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf = StackingClassifier(
        estimators=estimators, final_estimator=rf, cv=5, passthrough=True
    )
    clf.fit(X_train, y_train)
    X_trans = clf.transform(X_test)
    assert_allclose_dense_sparse(X_test, X_trans[:, -4:])
    assert sparse.issparse(X_trans)
    assert X_test.format == X_trans.format