Python StackingCVClassifier примеры, mlxtend.classifier.StackingCVClassifier Python примеры использования

Пример #1

0

Показать файл

def test_use_clones():
    np.random.seed(123)
    X, y = iris_data()

    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    StackingCVClassifier(classifiers=[clf1, clf2],
                         use_clones=True,
                         meta_classifier=meta,
                         shuffle=False).fit(X, y)

    assert_raises(
        exceptions.NotFittedError,
        "This RandomForestClassifier instance is not fitted yet."
        " Call 'fit' with appropriate arguments"
        " before using this estimator.", clf1.predict, X)

    StackingCVClassifier(classifiers=[clf1, clf2],
                         use_probas=True,
                         use_clones=False,
                         meta_classifier=meta,
                         shuffle=False).fit(X, y)

    clf1.predict(X)

Пример #2

0

Показать файл

Файл: StackingDemo.py Проект: JYLFamily/Python_Study_Note

    def function_set(self):
        # param
        self.__params = {
            # 注意名称必须是这样
            "logisticregression__C":
            list(np.linspace(start=0.1, stop=10, num=5)),
            "gradientboostingclassifier__learning_rate":
            list(np.linspace(start=0.1, stop=1, num=10)),
            "randomforestclassifier__n_estimators":
            list(range(5, 16)),
            "meta-logisticregression__C":
            list(np.linspace(start=0.1, stop=10, num=5))
        }

        # model
        self.__lr = LogisticRegression()
        self.__gb = GradientBoostingClassifier()
        self.__rf = RandomForestClassifier()
        self.__sclf = StackingCVClassifier(
            classifiers=[self.__lr, self.__gb, self.__rf],
            meta_classifier=self.__lr,
            use_probas=True,
            cv=5,
            use_features_in_secondary=True,
            verbose=1)

        self.__grid = GridSearchCV(estimator=self.__sclf,
                                   param_grid=self.__params,
                                   cv=5,
                                   refit=True)

Пример #3

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))

    expect = ['classifiers',
              'cv',
              'drop_last_proba',
              'gaussiannb',
              'kneighborsclassifier',
              'meta_classifier',
              'n_jobs',
              'pre_dispatch',
              'random_state',
              'randomforestclassifier',
              'shuffle',
              'store_train_meta_features',
              'stratify',
              'use_clones',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got

Пример #4

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_sparse_inputs_with_features_in_secondary():
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)
    X_train, X_test, y_train, y_test = train_test_split(X_breast, y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99

    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99
    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

Пример #5

0

Показать файл

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, _, y_train, _ = train_test_split(X_iris, y_iris, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Пример #6

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: venkatesh-1729/mlxtend

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(iris.data, iris.target)

Пример #7

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(X_iris, y_iris)

Пример #8

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_no_weight_support():
    w = np.array([random.random() for _ in range(len(y_iris))])
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=meta,
                                shuffle=False)
    with pytest.raises(TypeError):
        sclf.fit(X_iris, y_iris, sample_weight=w)

Пример #9

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: venkatesh-1729/mlxtend

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Пример #10

0

Показать файл

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(iris.data, iris.target)

Пример #11

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_no_weight_support_meta():
    w = np.array([random.random() for _ in range(len(y_iris))])
    meta = KNeighborsClassifier()
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)

    with pytest.raises(TypeError):
        sclf.fit(X_iris, y_iris, sample_weight=w)

Пример #12

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: vladimiralencar/mlxtend

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Пример #13

0

Показать файл

Файл: data_modeling.py Проект: Data-Science-Project-G13/monitoring-athletes-performance

 def _build_model(self, X_train, y_train):
     knn = KNeighborsClassifier(n_neighbors=1)
     rf = RandomForestClassifier(max_depth=3,max_features=6,n_estimators=50,random_state=0)
     SVM = svm.SVC(C=1.0,kernel='poly',degree=5)
     Xgb = XGBClassifier(alpha=15, colsample_bytree=0.1,learning_rate=1, max_depth=5,reg_lambda=10.0)
     gnb = GaussianNB()
     lr = LogisticRegression(C = 10.0, dual=False, max_iter=100, solver='lbfgs')
     sclf = StackingCVClassifier(classifiers=[knn, rf,lr,SVM,Xgb],
                                 meta_classifier=gnb,
                                 random_state=42)
     sclf.fit(X_train,y_train)
     return sclf

Пример #14

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_predict_meta_features():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris,
                                                        test_size=0.3)
    #  test default (class labels)
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    stclf.fit(X_train, y_train)
    test_meta_features = stclf.predict(X_test)
    assert test_meta_features.shape == (X_test.shape[0],)

Пример #15

0

Показать файл

Файл: stacking_baseline.py Проект: dujijundavid/Home_Default_Risk

 def model_fit(self):
     self.__ef = ExtraTreesClassifier(n_jobs=-1)
     self.__rf = RandomForestClassifier(n_jobs=-1)
     self.__lr = LogisticRegression()
     self.__gb = GradientBoostingClassifier()
     self.__xgb = XGBClassifier(n_jobs=-1, missing=-999.0)
     self.__sclf = StackingCVClassifier(
         classifiers=[self.__ef, self.__rf, self.__gb, self.__xgb],
         meta_classifier=self.__lr,
         use_probas=True,
         cv=3)
     self.__sclf.fit(self.__application_train_feature.values,
                     self.__application_train_label.values)

Пример #16

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: vladimiralencar/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, iris.target)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Пример #17

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: zmandyhe/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X_iris)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, y_iris)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Пример #18

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: venkatesh-1729/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, iris.target)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Пример #19

0

Показать файл

def test_meta_feat_reordering():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 shuffle=True,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         test_size=0.3)
    stclf.fit(X_train, y_train)

    assert round(roc_auc_score(y_train,
                 stclf.train_meta_features_[:, 1]), 2) == 0.88

Пример #20

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: vladimiralencar/mlxtend

def test_list_of_lists():
    X_list = [i for i in X]
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, iris.target)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Пример #21

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: venkatesh-1729/mlxtend

def test_list_of_lists():
    X_list = [i for i in X]
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, iris.target)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Пример #22

0

Показать файл

Файл: StackingDemo.py Проект: JYLFamily/Python_Study_Note

 def pick_the_best_function(self):
     self.__lr = LogisticRegression(C=0.1)
     self.__gb = GradientBoostingClassifier(learning_rate=0.1)
     self.__rf = RandomForestClassifier(n_estimators=5)
     self.__sclf = StackingCVClassifier(
         classifiers=[self.__lr, self.__gb, self.__rf],
         meta_classifier=self.__lr,
         use_probas=True,
         cv=5,
         use_features_in_secondary=True,
         verbose=1)
     self.__sclf.fit(self.__train, self.__train_label)
     print(
         roc_auc_score(self.__test_label,
                       self.__sclf.predict_proba(self.__test)[:, 1]))

Пример #23

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: zmandyhe/mlxtend

def test_list_of_lists():
    X_list = [i for i in X_iris]
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, y_iris)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Пример #24

0

Показать файл

def test_not_fitted():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta, shuffle=False)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict,
                  iris.data)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict_proba,
                  iris.data)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict_meta_features,
                  iris.data)

Пример #25

0

Показать файл

Файл: plain_classifiers.py Проект: fletch22/nba_win_predictor

def get_stacked_classifiers():
    dict_clfs = get_classifiers()

    kwargs = {
        'C': 100.0,
        'dual': False,
        'fit_intercept': True,
        'multi_class': 'multinomial',
        'penalty': 'l2',
        'solver': 'saga'
    }
    lr = LogisticRegression(**kwargs)

    names = [clf_name for clf_name in dict_clfs.keys()]
    classifiers = [dict_clfs[clf_name] for clf_name in names]

    # names = names[0:1]
    # classifiers = classifiers[0:1]

    clf_stacked = StackingCVClassifier(classifiers=classifiers,
                                       use_probas=True,
                                       use_features_in_secondary=True,
                                       meta_classifier=lr)
    names.append(CLF_TYPES.StackingCVClassifier)
    classifiers.append(clf_stacked)

    return classifiers, names

Пример #26

0

Показать файл

def stacking_clf_and_gridsearch_and_same_algo_multi_times(X, y):
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=RANDOM_SEED)
    # clf3 = GaussianNB
    lr = LogisticRegression()

    # using a regression algorithm multiple times
    sclf = StackingCVClassifier(classifiers=[clf1, clf1, clf2],
                                meta_classifier=lr,
                                random_state=RANDOM_SEED)
    params = {
        'kneighborsclassifier-1__n_neighbors':
        [1, 5],  # add an additional number suffix in the parameter grid
        'kneighborsclassifier-2__n_neighbors':
        [1, 5],  # add an additional number suffix in the parameter grid
        'randomforestclassifier__n_estimators': [10, 50],
        'meta_classifier__C': [1, 10]
    }
    grid = model_selection.GridSearchCV(estimator=sclf,
                                        param_grid=params,
                                        cv=5,
                                        refit=True)
    grid.fit(X, y)
    cv_key = ('mean_test_score', 'std_test_score', 'params')
    for r, _ in enumerate(grid.cv_results_['mean_test_score']):
        print('%0.3f +/- %0.2f %r' %
              (grid.cv_results_[cv_key[0]][r], grid.cv_results_[cv_key[1]][r],
               grid.cv_results_[cv_key[2]][r]))
    print('Best paras: %s' % grid.best_params_)
    print('Best scores: %0.3f' % grid.best_score_)

Пример #27

0

Показать файл

def simple_stacking_cv_classification_and_gridsearch(X, y):
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=RANDOM_SEED)
    clf3 = GaussianNB()
    lr = LogisticRegression()

    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr,
                                random_state=RANDOM_SEED)
    params = {
        'kneighborsclassifier__n_neighbors': [1, 5],
        'randomforestclassifier__n_estimators': [10, 50],
        'meta_classifier__C': [1, 10]
    }
    grid = model_selection.GridSearchCV(estimator=sclf,
                                        param_grid=params,
                                        cv=5,
                                        refit=True)
    grid.fit(X, y)
    cv_keys = ('mean_test_score', 'std_test_score', 'params')
    for r, _ in enumerate(grid.cv_results_['mean_test_score']):
        print(
            "%0.3f +/- %0.2f %r" %
            (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] /
             2.0, grid.cv_results_[cv_keys[2]][r]))

    print('Best parameters: %s' % grid.best_params_)
    print('Accuracy: %0.2f' % grid.best_score_)

Пример #28

0

Показать файл

Файл: CDH_C_ST_LG_PSO.py Проект: FutureLYU/iNuc-en-NDPE

def aimfuc(k):

    obj = np.ones(k.shape[0])

    for i in range(0, k.shape[0]):

        clf_LG = LogisticRegression(C=k[i, 0],
                                    solver='liblinear',
                                    random_state=RANDOM_SEED,
                                    penalty='l2')
        sclf = StackingCVClassifier(
            classifiers=[clf_SVM, clf_RF, clf_MLP, clf_Ad],
            meta_classifier=clf_LG,
            use_probas=True)

        # sclf.fit(X_C, Y_C)
        scores_ST = cross_val_score(sclf,
                                    X_train,
                                    Y_train,
                                    cv=6,
                                    scoring='accuracy',
                                    n_jobs=-1)
        f = scores_ST.mean()  # 0.9001018065904675 3svm
        obj[i] = -f
    return obj

Пример #29

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: vladimiralencar/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = [
        'classifiers', 'cv', 'gaussiannb', 'kneighborsclassifier',
        'meta-logisticregression', 'meta_classifier', 'randomforestclassifier',
        'refit', 'shuffle', 'store_train_meta_features', 'stratify',
        'use_features_in_secondary', 'use_probas', 'verbose'
    ]
    assert got == expect, got

Пример #30

0

Показать файл

Файл: stacking_cv.py Проект: hikekang/hkx_tf_practice

def fun2():
    from sklearn import model_selection
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.classifier import StackingClassifier
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.classifier import StackingClassifier
    from mlxtend.classifier import StackingCVClassifier
    import numpy as np
    import warnings
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()

    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                use_probas=True,
                                meta_classifier=lr,
                                random_state=42)

    print('3-fold cross validation:\n')

    for clf, label in zip([clf1, clf2, clf3, sclf],
                          ['KNN',
                           'Random Forest',
                           'Naive Bayes',
                           'StackingClassifier']):
        scores = model_selection.cross_val_score(clf, X, y,
                                                 cv=3, scoring='accuracy')
        print("Accuracy: %0.2f (+/- %0.2f) [%s]"
              % (scores.mean(), scores.std(), label))

Пример #31

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_works_with_df_if_fold_indexes_missing():
    """This is a regression test to make sure fitting will still work even if
    training data has ids that cannot be indexed using the indexes from the cv
    (e.g. skf)

    Some possibilities:
    + Output of the folds are not neatly consecutive (i.e. [341, 345, 543, ...]
      instead of [0, 1, ... n])
    + Indexes just start from some number greater than the size of the input
      (see test case)

    Training data sometimes has ids that carry other information, and selection
    of rows based on cv should not break.

    This is fixed in the code using `safe_indexing`
    """

    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)

    X_modded = pd.DataFrame(X_breast,
                            index=np.arange(X_breast.shape[0]) + 1000)
    y_modded = pd.Series(y_breast,
                         index=np.arange(y_breast.shape[0]) + 1000)

    X_train, X_test, y_train, y_test = train_test_split(X_modded,
                                                        y_modded,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99, \
        round(stclf.score(X_train, y_train), 2)

Пример #32

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_meta_feat_reordering():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 shuffle=True,
                                 random_state=42,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         random_state=0,
                                                         test_size=0.3)
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 0.86
    else:
        expected_value = 0.87

    assert round(roc_auc_score(y_train,
                 stclf.train_meta_features_[:, 1]), 2) == expected_value, \
        round(roc_auc_score(y_train,
              stclf.train_meta_features_[:, 1]), 2)

Пример #33

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: NextNight/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = ['classifiers',
              'cv',
              'gaussiannb',
              'kneighborsclassifier',
              'meta-logisticregression',
              'meta_classifier',
              'randomforestclassifier',
              'refit',
              'shuffle',
              'store_train_meta_features',
              'stratify',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got

Пример #34

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_sparse_inputs():
    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

Пример #35

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_no_weight_support_with_no_weight():
    logit = LogisticRegression(multi_class='ovr', solver='liblinear')
    rf = RandomForestClassifier(n_estimators=10)
    gnb = GaussianNB()
    knn = KNeighborsClassifier()
    sclf = StackingCVClassifier(classifiers=[logit, rf, gnb],
                                meta_classifier=knn,
                                shuffle=False)
    sclf.fit(X_iris, y_iris)

    sclf = StackingCVClassifier(classifiers=[logit, knn, gnb],
                                meta_classifier=rf,
                                shuffle=False)
    sclf.fit(X_iris, y_iris)

Пример #36

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_sample_weight():
    # with no weight given
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    prob1 = sclf.fit(X_iris, y_iris).predict_proba(X_iris)

    # with weight = 1
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    w = np.ones(len(y_iris))
    prob2 = sclf.fit(X_iris, y_iris,
                     sample_weight=w).predict_proba(X_iris)

    # with random weight
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y_iris))])
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    prob3 = sclf.fit(X_iris, y_iris,
                     sample_weight=w).predict_proba(X_iris)

    diff12 = np.max(np.abs(prob1 - prob2))
    diff23 = np.max(np.abs(prob2 - prob3))
    assert diff12 < 1e-3, "max diff is %.4f" % diff12
    assert diff23 > 1e-3, "max diff is %.4f" % diff23

Пример #37

0

Показать файл

Файл: test_stacking_cv_classifier.py Проект: rasbt/mlxtend

def test_StackingClassifier_drop_last_proba():
    np.random.seed(123)
    lr1 = LogisticRegression(solver='liblinear',
                             multi_class='ovr')
    sclf1 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=False,
                                 meta_classifier=lr1)

    sclf1.fit(X_iris, y_iris)
    r1 = sclf1.predict_meta_features(X_iris[:2])
    assert r1.shape == (2, 6)

    sclf2 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=True,
                                 meta_classifier=lr1)

    sclf2.fit(X_iris, y_iris)
    r2 = sclf2.predict_meta_features(X_iris[:2])
    assert r2.shape == (2, 4), r2.shape

    sclf3 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=True,
                                 meta_classifier=lr1)

    sclf3.fit(X_iris[0:100], y_iris[0:100])  # only 2 classes
    r3 = sclf3.predict_meta_features(X_iris[:2])
    assert r3.shape == (2, 2), r3.shape

Python StackingCVClassifier примеры использования