Python ExhaustiveFeatureSelector примеры, mlxtend.feature_selection.ExhaustiveFeatureSelector Python примеры использования

Пример #1

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_check_pandas_dataframe_fit():

    knn = KNeighborsClassifier(n_neighbors=4)
    iris = load_iris()
    X = iris.data
    y = iris.target
    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    df = pd.DataFrame(X, columns=['sepal length', 'sepal width',
                                  'petal length', 'petal width'])

    sfs1 = efs1.fit(X, y)
    assert efs1.best_idx_ == (2, 3), efs1.best_idx_
    assert efs1.best_feature_names_ == ('2', '3')
    assert efs1.interrupted_ is False

    sfs1._TESTING_INTERRUPT_MODE = True
    sfs1 = sfs1.fit(df, y)
    assert efs1.best_idx_ == (0, 1), efs1.best_idx_
    assert efs1.best_feature_names_ == ('sepal length', 'sepal width')
    assert efs1.interrupted_ is True

Пример #2

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: rasbt/mlxtend

def test_knn_cv3_groups():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=GroupKFold(n_splits=3),
               print_progress=False)
    np.random.seed(1630672634)
    groups = np.random.randint(0, 6, size=len(y))
    efs1 = efs1.fit(X, y, groups=groups)
    # print(efs1.subsets_)
    expect = {0: {'cv_scores': np.array([0.97916667, 0.93877551, 0.9245283]),
                  'feature_idx': (0, 1, 2),
                  'avg_score': 0.9474901595858469,
                  'feature_names': ('0', '1', '2')},
              1: {'cv_scores': np.array([1., 0.93877551, 0.9245283]),
                  'feature_idx': (0, 1, 3),
                  'avg_score': 0.9544346040302915,
                  'feature_names': ('0', '1', '3')},
              2: {'cv_scores': np.array([0.97916667, 0.95918367, 0.9245283]),
                  'feature_idx': (0, 2, 3),
                  'avg_score': 0.9542928806742822,
                  'feature_names': ('0', '2', '3')},
              3: {'cv_scores': np.array([0.97916667, 0.95918367, 0.94339623]),
                  'feature_idx': (1, 2, 3),
                  'avg_score': 0.9605821888503829,
                  'feature_names': ('1', '2', '3')}}
    dict_compare_utility(d1=expect, d2=efs1.subsets_)

Пример #3

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: vdthatte/mlxtend

def test_knn_cv3():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y)
    expect = {0: {'avg_score': 0.9391025641025641,
                  'feature_idx': (0, 1, 2),
                  'cv_scores': np.array([0.97435897, 0.94871795,
                                         0.88888889, 0.94444444])},
              1: {'avg_score': 0.94017094017094016,
                  'feature_idx': (0, 1, 3),
                  'cv_scores': np.array([0.92307692, 0.94871795,
                                         0.91666667, 0.97222222])},
              2: {'avg_score': 0.95299145299145294,
                  'feature_idx': (0, 2, 3),
                  'cv_scores': np.array([0.97435897, 0.94871795,
                                         0.91666667, 0.97222222])},
              3: {'avg_score': 0.97275641025641035,
                  'feature_idx': (1, 2, 3),
                  'cv_scores': np.array([0.97435897, 1.,
                                         0.94444444, 0.97222222])}}
    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (1, 2, 3)
    assert round(efs1.best_score_, 4) == 0.9728

Пример #4

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_knn_wo_cv():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=2,
               max_features=3,
               scoring='accuracy',
               cv=0,
               print_progress=False)
    efs1 = efs1.fit(X, y)
    expect = {0: {'feature_idx': (0, 1),
                  'feature_names': ('0', '1'),
                  'avg_score': 0.82666666666666666,
                  'cv_scores': np.array([0.82666667])},
              1: {'feature_idx': (0, 2),
                  'feature_names': ('0', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              2: {'feature_idx': (0, 3),
                  'feature_names': ('0', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              3: {'feature_idx': (1, 2),
                  'feature_names': ('1', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              4: {'feature_idx': (1, 3),
                  'feature_names': ('1', '3'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              5: {'feature_idx': (2, 3),
                  'feature_names': ('2', '3'),
                  'avg_score': 0.97333333333333338,
                  'cv_scores': np.array([0.97333333])},
              6: {'feature_idx': (0, 1, 2),
                  'feature_names': ('0', '1', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              7: {'feature_idx': (0, 1, 3),
                  'feature_names': ('0', '1', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              8: {'feature_idx': (0, 2, 3),
                  'feature_names': ('0', '2', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              9: {'feature_idx': (1, 2, 3),
                  'feature_names': ('1', '2', '3'),
                  'avg_score': 0.97333333333333338,
                  'cv_scores': np.array([0.97333333])}}
    dict_compare_utility(d1=expect, d2=efs1.subsets_)

Пример #5

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: tetrar124/mlxtend

def test_knn_wo_cv():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=2,
               max_features=3,
               scoring='accuracy',
               cv=0,
               print_progress=False)
    efs1 = efs1.fit(X, y)
    expect = {0: {'feature_idx': (0, 1),
                  'feature_names': ('0', '1'),
                  'avg_score': 0.82666666666666666,
                  'cv_scores': np.array([0.82666667])},
              1: {'feature_idx': (0, 2),
                  'feature_names': ('0', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              2: {'feature_idx': (0, 3),
                  'feature_names': ('0', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              3: {'feature_idx': (1, 2),
                  'feature_names': ('1', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              4: {'feature_idx': (1, 3),
                  'feature_names': ('1', '3'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              5: {'feature_idx': (2, 3),
                  'feature_names': ('2', '3'),
                  'avg_score': 0.97333333333333338,
                  'cv_scores': np.array([0.97333333])},
              6: {'feature_idx': (0, 1, 2),
                  'feature_names': ('0', '1', '2'),
                  'avg_score': 0.95999999999999996,
                  'cv_scores': np.array([0.96])},
              7: {'feature_idx': (0, 1, 3),
                  'feature_names': ('0', '1', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              8: {'feature_idx': (0, 2, 3),
                  'feature_names': ('0', '2', '3'),
                  'avg_score': 0.96666666666666667,
                  'cv_scores': np.array([0.96666667])},
              9: {'feature_idx': (1, 2, 3),
                  'feature_names': ('1', '2', '3'),
                  'avg_score': 0.97333333333333338,
                  'cv_scores': np.array([0.97333333])}}
    dict_compare_utility(d1=expect, d2=efs1.subsets_)

Пример #6

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_regression():
    boston = load_boston()
    X, y = boston.data[:, [1, 2, 6, 8, 12]], boston.target
    lr = LinearRegression()
    efs_r = EFS(lr,
                min_features=3,
                max_features=4,
                scoring='neg_mean_squared_error',
                cv=10,
                print_progress=False)
    efs_r = efs_r.fit(X, y)
    assert efs_r.best_idx_ == (0, 2, 4)
    assert round(efs_r.best_score_, 4) == -40.8777

Пример #7

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: shreewatsa/mlxtend

def test_regression():
    boston = load_boston()
    X, y = boston.data[:, [1, 2, 6, 8, 12]], boston.target
    lr = LinearRegression()
    efs_r = EFS(lr,
                min_features=3,
                max_features=4,
                scoring='neg_mean_squared_error',
                cv=10,
                print_progress=False)
    efs_r = efs_r.fit(X, y)
    assert efs_r.best_idx_ == (0, 2, 4)
    assert round(efs_r.best_score_, 4) == -40.8777

Пример #8

0

Показать файл

def test_fit_params():
    iris = load_iris()
    X = iris.data
    y = iris.target
    sample_weight = np.ones(X.shape[0])
    forest = RandomForestClassifier(n_estimators=100, random_state=123)
    efs1 = EFS(forest,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y, sample_weight=sample_weight)
    expect = {0: {'feature_idx': (0, 1, 2),
                  'feature_names': ('0', '1', '2'),
                  'cv_scores': np.array([0.947, 0.868, 0.919, 0.973]),
                  'avg_score': 0.9269203413940257},
              1: {'feature_idx': (0, 1, 3),
                  'feature_names': ('0', '1', '3'),
                  'cv_scores': np.array([0.921, 0.921, 0.892, 1.]),
                  'avg_score': 0.9337606837606838},
              2: {'feature_idx': (0, 2, 3),
                  'feature_names': ('0', '2', '3'),
                  'cv_scores': np.array([0.974, 0.947, 0.919, 0.973]),
                  'avg_score': 0.9532361308677098},
              3: {'feature_idx': (1, 2, 3),
                  'feature_names': ('1', '2', '3'),
                  'cv_scores': np.array([0.974, 0.947, 0.892, 1.]),
                  'avg_score': 0.9532361308677098}}

    if Version(sklearn_version) < Version("0.22"):
        expect[0]['avg_score'] = 0.9401709401709402
        expect[0]['cv_scores'] = np.array([0.94871795, 0.92307692,
                                           0.91666667, 0.97222222])
        expect[1]['cv_scores'] = np.array([0.94871795, 0.92307692,
                                           0.91666667, 0.97222222])
        expect[2]['cv_scores'] = np.array([0.94871795, 0.92307692,
                                           0.91666667, 0.97222222])
        expect[2]['avg_score'] = 0.9599358974358974
        expect[3]['avg_score'] = 0.9599358974358974
        expect[3]['cv_scores'] = np.array([0.97435897, 0.94871795,
                                           0.91666667, 1.])
        assert round(efs1.best_score_, 4) == 0.9599

    else:
        assert round(efs1.best_score_, 4) == 0.9532

    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (0, 2, 3)

Пример #9

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_clone_params_pass():
    iris = load_iris()
    X = iris.data
    y = iris.target
    lr = SoftmaxRegression(random_seed=1)
    efs1 = EFS(lr,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)
    efs1 = efs1.fit(X, y)
    assert(efs1.best_idx_ == (1, 3))

Пример #10

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: shreewatsa/mlxtend

def test_clone_params_pass():
    iris = load_iris()
    X = iris.data
    y = iris.target
    lr = SoftmaxRegression(random_seed=1)
    efs1 = EFS(lr,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)
    efs1 = efs1.fit(X, y)
    assert(efs1.best_idx_ == (1, 3))

Пример #11

0

Показать файл

def test_knn_cv3():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y)
    expect = {0: {'avg_score': 0.9391025641025641,
                  'feature_idx': (0, 1, 2),
                  'feature_names': ('0', '1', '2'),
                  'cv_scores': np.array([0.974, 0.947, 0.892, 0.946])},
              1: {'avg_score': 0.9400782361308677,
                  'feature_idx': (0, 1, 3),
                  'feature_names': ('0', '1', '3'),
                  'cv_scores': np.array([0.921, 0.947, 0.919, 0.973])},
              2: {'avg_score': 0.95299145299145294,
                  'feature_idx': (0, 2, 3),
                  'feature_names': ('0', '2', '3'),
                  'cv_scores': np.array([0.974, 0.947, 0.919, 0.973])},
              3: {'avg_score': 0.97275641025641035,
                  'feature_idx': (1, 2, 3),
                  'feature_names': ('1', '2', '3'),
                  'cv_scores': np.array([0.974, 1.   , 0.946, 0.973])}}

    if Version(sklearn_version) < Version("0.22"):
        expect[0]['cv_scores'] = np.array([0.97435897, 0.94871795,
                                           0.88888889, 0.94444444])
        expect[1]['cv_scores'] = np.array([0.92307692, 0.94871795,
                                           0.91666667, 0.97222222])
        expect[2]['cv_scores'] = np.array([0.97435897, 0.94871795,
                                           0.91666667, 0.97222222])
        expect[3]['cv_scores'] = np.array([0.97435897, 0.94871795,
                                           0.91666667, 0.97222222])
        expect[1]['avg_score'] = 0.94017094017094016
        assert round(efs1.best_score_, 4) == 0.9728
    else:
        assert round(efs1.best_score_, 4) == 0.9732

    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (1, 2, 3)
    assert efs1.best_feature_names_ == ('1', '2', '3')

Пример #12

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_fit_transform():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)

    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    X_t = efs1.fit_transform(X, y)
    assert X_t.shape == (150, 2)

Пример #13

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: shreewatsa/mlxtend

def test_fit_transform():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)

    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    X_t = efs1.fit_transform(X, y)
    assert X_t.shape == (150, 2)

Пример #14

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: thaolinhnp/Machine_Learning

def test_knn_cv3():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y)
    expect = {
        0: {
            'avg_score': 0.9391025641025641,
            'feature_idx': (0, 1, 2),
            'feature_names': ('0', '1', '2'),
            'cv_scores':
            np.array([0.97435897, 0.94871795, 0.88888889, 0.94444444])
        },
        1: {
            'avg_score': 0.94017094017094016,
            'feature_idx': (0, 1, 3),
            'feature_names': ('0', '1', '3'),
            'cv_scores':
            np.array([0.92307692, 0.94871795, 0.91666667, 0.97222222])
        },
        2: {
            'avg_score': 0.95299145299145294,
            'feature_idx': (0, 2, 3),
            'feature_names': ('0', '2', '3'),
            'cv_scores':
            np.array([0.97435897, 0.94871795, 0.91666667, 0.97222222])
        },
        3: {
            'avg_score': 0.97275641025641035,
            'feature_idx': (1, 2, 3),
            'feature_names': ('1', '2', '3'),
            'cv_scores': np.array([0.97435897, 1., 0.94444444, 0.97222222])
        }
    }
    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (1, 2, 3)
    assert efs1.best_feature_names_ == ('1', '2', '3')
    assert round(efs1.best_score_, 4) == 0.9728

Пример #15

0

Показать файл

Файл: exhaustive_selection.py Проект: hvgazula/fmri_classification

def perform_efs(curr_model, X, y, min_cols, max_cols):

    efs1 = EFS(curr_model,
               min_features=min_cols,
               max_features=max_cols,
               print_progress=True,
               scoring='accuracy',
               cv=5,
               n_jobs=-1)

    efs1 = efs1.fit(X, y)

    df = pd.DataFrame.from_dict(efs1.get_metric_dict()).T
    #    df['test_acc'] = df['feature_idx'].apply(
    #        lambda x: make_predictions_on_test(efs1, curr_model, X_train, X_test, y_train, y_test, x)
    #    )

    return df

Пример #16

0

Показать файл

def test_custom_feature_names():
    knn = KNeighborsClassifier(n_neighbors=4)
    iris = load_iris()
    X = iris.data
    y = iris.target
    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    efs1 = efs1.fit(X, y, custom_feature_names=(
          'sepal length', 'sepal width', 'petal length', 'petal width'))
    assert efs1.best_idx_ == (2, 3), efs1.best_idx_
    assert efs1.best_feature_names_ == ('petal length', 'petal width')

Пример #17

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_custom_feature_names():
    knn = KNeighborsClassifier(n_neighbors=4)
    iris = load_iris()
    X = iris.data
    y = iris.target
    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    efs1 = efs1.fit(X, y, custom_feature_names=(
          'sepal length', 'sepal width', 'petal length', 'petal width'))
    assert efs1.best_idx_ == (2, 3), efs1.best_idx_
    assert efs1.best_feature_names_ == ('petal length', 'petal width')

Пример #18

0

Показать файл

Файл: LibrairiePerso_v4_9.py Проект: Jdecot/House-prices

def ExhaustiveFeatureSelector(X, y, min_features=1, max_features=4):
    from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
    from sklearn.linear_model import LinearRegression
    lr = LinearRegression()

    efs1 = EFS(lr,
               min_features=min_features,
               max_features=max_features,
               scoring='r2',
               print_progress=True,
               cv=5)

    efs1 = efs1.fit(X, y)

    #print('Best subset:', efs1.best_idx_)
    print('Best subset (corresponding names):', efs1.best_feature_names_)
    print('Best R² score: %.2f' % efs1.best_score_)
    return efs1.best_feature_names_, efs1.best_score_

Пример #19

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: thaolinhnp/Machine_Learning

def test_fit_params():
    iris = load_iris()
    X = iris.data
    y = iris.target
    sample_weight = np.ones(X.shape[0])
    forest = RandomForestClassifier(n_estimators=100, random_state=123)
    efs1 = EFS(forest,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y, sample_weight=sample_weight)
    expect = {
        0: {
            'feature_idx': (0, 1, 2),
            'feature_names': ('0', '1', '2'),
            'cv_scores':
            np.array([0.94871795, 0.92307692, 0.91666667, 0.97222222]),
            'avg_score': 0.9401709401709402
        },
        1: {
            'feature_idx': (0, 1, 3),
            'feature_names': ('0', '1', '3'),
            'cv_scores': np.array([0.92307692, 0.92307692, 0.88888889, 1.]),
            'avg_score': 0.9337606837606838
        },
        2: {
            'feature_idx': (0, 2, 3),
            'feature_names': ('0', '2', '3'),
            'cv_scores':
            np.array([0.97435897, 0.94871795, 0.94444444, 0.97222222]),
            'avg_score': 0.9599358974358974
        },
        3: {
            'feature_idx': (1, 2, 3),
            'feature_names': ('1', '2', '3'),
            'cv_scores': np.array([0.97435897, 0.94871795, 0.91666667, 1.]),
            'avg_score': 0.9599358974358974
        }
    }
    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (0, 2, 3)
    assert round(efs1.best_score_, 4) == 0.9599

Пример #20

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_check_pandas_dataframe_transform():
    knn = KNeighborsClassifier(n_neighbors=4)
    iris = load_iris()
    X = iris.data
    y = iris.target
    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    df = pd.DataFrame(X, columns=['sepal length', 'sepal width',
                                  'petal length', 'petal width'])
    efs1 = efs1.fit(df, y)
    assert efs1.best_idx_ == (2, 3)
    assert (150, 2) == efs1.transform(df).shape

Пример #21

0

Показать файл

def test_check_pandas_dataframe_transform():
    knn = KNeighborsClassifier(n_neighbors=4)
    iris = load_iris()
    X = iris.data
    y = iris.target
    efs1 = EFS(knn,
               min_features=2,
               max_features=2,
               scoring='accuracy',
               cv=0,
               clone_estimator=False,
               print_progress=False,
               n_jobs=1)

    df = pd.DataFrame(X, columns=['sepal length', 'sepal width',
                                  'petal length', 'petal width'])
    efs1 = efs1.fit(df, y)
    assert efs1.best_idx_ == (2, 3)
    assert (150, 2) == efs1.transform(df).shape

Пример #22

0

Показать файл

def test_knn_cv3_groups():
    iris = load_iris()
    X = iris.data
    y = iris.target
    knn = KNeighborsClassifier(n_neighbors=4)
    efs1 = EFS(knn,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=GroupKFold(n_splits=3),
               print_progress=False)
    np.random.seed(1630672634)
    groups = np.random.randint(0, 6, size=len(y))
    efs1 = efs1.fit(X, y, groups=groups)

    expect = {
        0: {
            'cv_scores': np.array([0.97916667, 0.93877551, 0.9245283]),
            'feature_idx': (0, 1, 2),
            'avg_score': 0.9474901595858469,
            'feature_names': ('0', '1', '2')
        },
        1: {
            'cv_scores': np.array([1., 0.93877551, 0.9245283]),
            'feature_idx': (0, 1, 3),
            'avg_score': 0.9544346040302915,
            'feature_names': ('0', '1', '3')
        },
        2: {
            'cv_scores': np.array([0.97916667, 0.95918367, 0.9245283]),
            'feature_idx': (0, 2, 3),
            'avg_score': 0.9542928806742822,
            'feature_names': ('0', '2', '3')
        },
        3: {
            'cv_scores': np.array([0.97916667, 0.95918367, 0.94339623]),
            'feature_idx': (1, 2, 3),
            'avg_score': 0.9605821888503829,
            'feature_names': ('1', '2', '3')
        }
    }
    dict_compare_utility(d1=expect, d2=efs1.subsets_)

Пример #23

0

Показать файл

def wrapper_selection():
    print('--------------------------------------------------------')
    print('Utilizando tecnica de Envoltura...')
    models = [
        svm.SVC(),
        RandomForestClassifier(),
        GaussianNB(),
        LogisticRegression(),
        KNeighborsClassifier()
    ]
    for model in models:
        efs = ExhaustiveFeatureSelector(model,
                                        min_features=1,
                                        max_features=5,
                                        scoring='accuracy',
                                        cv=5)
        efs = efs.fit(data, labels)
        selected_features = columns[list(efs.best_idx_)]
        print(
            f'Variables seleccionadas utilizando {model}: {selected_features}')

Пример #24

0

Показать файл

def wrapper(x_train_df):
    x_train = x_train_df.drop(["id", "failed test"], axis=1)
    y_train = x_train_df["failed test"]
    feature_selector = EFS(RandomForestClassifier(max_depth=17,
                                                  n_estimators=136,
                                                  max_features=0.307,
                                                  min_samples_split=30,
                                                  random_state=42),
                           min_features=6,
                           max_features=7,
                           scoring='log_loss',
                           print_progress=True,
                           n_jobs=1,
                           cv=5)
    features = feature_selector.fit(x_train, y_train)
    print('Best recall score: %.2f' % feature_selector.best_score_)
    print('Best subset (indices):', feature_selector.best_idx_)
    print('Best subset (corresponding names):',
          feature_selector.best_feature_names_)
    print('Subsets_: ', feature_selector.subsets_)

Пример #25

0

Показать файл

def exhaustive_feature_selection(x_data, y_data, min_feat, max_feat):
    print(f"Applying exhaustive feature selection to numeric data")
    print(
        f"cat variables before backward feature selection {x_data.select_dtypes(include='object').columns.shape}"
    )
    print(
        f"numeric variables before backward feature selection {x_data.select_dtypes(include='number').columns.shape}"
    )

    numeric_cols = x_data.select_dtypes(include='number').columns

    temp = x_data[numeric_cols]

    efs = EFS(RandomForestRegressor(n_jobs=4),
              max_features=max_feat,
              min_features=min_feat,
              scoring='r2',
              print_progress=True,
              cv=2)

    efs.fit(temp, y_data)

    idx = efs.best_idx_

    print(idx)

    idx = list(idx)

    cols_to_keep = x_data.columns[idx]
    cols_to_drop = [x for x in numeric_cols if x not in cols_to_keep]

    print(cols_to_drop.__len__())

    x_data.drop(labels=cols_to_drop, axis=1, inplace=True)
    print(
        f"cat variables after exhaustive feature selection {x_data.select_dtypes(include='object').columns}"
    )
    print(
        f"numeric variables after exhaustive  feature selection {x_data.select_dtypes(include='number').columns}"
    )
    return x_data

Пример #26

0

Показать файл

Файл: feature_selection_exhaustive.py Проект: ParikhKadam/driverlessai-recipes

    def create_data(X: dt.Frame = None) -> pd.DataFrame:
        if X is None:
            return []

        from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS

        X = X.to_pandas()
        y = X[TARGET_COLUMN].values
        X.drop(TARGET_COLUMN, axis=1, inplace=True)

        efs = EFS(ESTIMATOR,
                  min_features=MIN_FEATURES,
                  max_features=MAX_FEATURES,
                  scoring=SCORING,
                  cv=CV,
                  n_jobs=-1)

        efs.fit(X, y)

        X_fs = X.iloc[:, list(efs.best_idx_)]

        return X_fs

Пример #27

0

Показать файл

Файл: feature_selection.py Проект: gusriobr/ml-recipes

def exhaustive_feature_selection(x_train, y_train, model=None, num_features=[2, 5], classification_tasks=True,
                                 scoring=None):
    print("============== Exhaustive feature selection ===================")
    if not model:
        if classification_tasks:
            model = LogisticRegression(multi_class='multinomial',
                                       solver='lbfgs',
                                       random_state=123)
        else:
            model = Ridge()

    if not scoring:
        if classification_tasks:
            scoring = "accuracy"
        else:
            scoring = "neg_mean_absolute_error"

    efs = EFS(estimator=model,
               min_features=num_features[0],
               max_features=num_features[1],
               scoring=scoring,
               print_progress=False,
               clone_estimator=False,
               cv=10,
               n_jobs=2)

    X = efs.fit(x_train.values, y_train.values)
    print('Best accuracy score: %.2f' % efs.best_score_)
    col_list = []
    col_list.extend(efs.best_idx_)
    col_names = x_train.columns
    print('Best subset:', col_names[col_list].values)
    x_train = x_train.iloc[:,col_list]

    print("=================================")
    return x_train

Пример #28

0

Показать файл

Файл: test_exhaustive_feature_selector.py Проект: JJLWHarrison/mlxtend

def test_fit_params():
    iris = load_iris()
    X = iris.data
    y = iris.target
    sample_weight = np.ones(X.shape[0])
    forest = RandomForestClassifier(n_estimators=100, random_state=123)
    efs1 = EFS(forest,
               min_features=3,
               max_features=3,
               scoring='accuracy',
               cv=4,
               print_progress=False)
    efs1 = efs1.fit(X, y, sample_weight=sample_weight)
    expect = {0: {'feature_idx': (0, 1, 2),
                  'feature_names': ('0', '1', '2'),
                  'cv_scores': np.array([0.94871795, 0.92307692,
                                         0.91666667, 0.97222222]),
                  'avg_score': 0.9401709401709402},
              1: {'feature_idx': (0, 1, 3),
                  'feature_names': ('0', '1', '3'),
                  'cv_scores': np.array([0.92307692, 0.92307692,
                                         0.88888889, 1.]),
                  'avg_score': 0.9337606837606838},
              2: {'feature_idx': (0, 2, 3),
                  'feature_names': ('0', '2', '3'),
                  'cv_scores': np.array([0.97435897, 0.94871795,
                                         0.94444444, 0.97222222]),
                  'avg_score': 0.9599358974358974},
              3: {'feature_idx': (1, 2, 3),
                  'feature_names': ('1', '2', '3'),
                  'cv_scores': np.array([0.97435897, 0.94871795,
                                         0.91666667, 1.]),
                  'avg_score': 0.9599358974358974}}
    dict_compare_utility(d1=expect, d2=efs1.subsets_)
    assert efs1.best_idx_ == (0, 2, 3)
    assert round(efs1.best_score_, 4) == 0.9599

Пример #29

0

Показать файл

Файл: midterm.py Проект: wpmcgrath95/BDA696_ML_Project

    def brute_force(self, X, y, y_type):
        if y_type == "binary":
            est = LinearRegression()
            efs = EFS(
                estimator=est,
                min_features=1,
                max_features=2,
                scoring="neg_mean_squared_error",
                cv=5,
            )

            efs = efs.fit(X, y)
            efs_df = pd.DataFrame.from_dict(efs.get_metric_dict()).T
            efs_df.sort_values("avg_score", inplace=True, ascending=False)

        else:
            est = LogisticRegression()
            efs = EFS(
                estimator=est,
                min_features=1,
                max_features=2,
                scoring="neg_mean_squared_error",
                cv=5,
            )

            efs = efs.fit(X, y)
            efs_df = pd.DataFrame.from_dict(efs.get_metric_dict()).T
            efs_df.sort_values("avg_score", inplace=True, ascending=False)

        # horizontal bar chart
        fig, ax = plt.subplots(figsize=(12, 9))
        y_pos = np.arange(len(efs_df))
        ax.barh(y_pos, efs_df["avg_score"], xerr=efs_df["std_dev"])
        ax.set_yticks(y_pos)
        ax.set_xlabel("Avg Score")
        ax.set_ylabel("Feature Names")
        ax.tick_params(labelleft=False)
        plt.show()

        return efs_df

Пример #30

0

Показать файл

Файл: loan_status.py Проект: parul-mandal/loan_status

    sc.fit_transform(newdata.drop(['Loan_ID', 'Loan_Status'], axis=1)),
    columns=[
        'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
        'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Gender_Male',
        'Married_Yes', 'Dependents_1', 'Dependents_2', 'Dependents_3+',
        'Education_Not Graduate', 'Self_Employed_Yes'
    ])
# Target class
Y = pd.DataFrame(newdata['Loan_Status'])
#Visualization
sns.pairplot(df1, hue="Loan_Status")
#Splitting data into train and test samples
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
#Running ExhaustiveFeatureSelector() for feature_selection on 3 different classifiers
efs = ExhaustiveFeatureSelector(RandomForestClassifier(),
                                max_features=6,
                                scoring='roc_auc',
                                cv=5)
efs_fit = efs.fit(X_train, Y_train)
selected_features = X_train.columns[list(efs_fit.best_idx_)]
print(selected_features)
print(efs_fit.best_score_)
rClassifier = RandomForestClassifier(random_state=0)
rClassifier.fit(X_train[selected_features], Y_train)
Y_RCF = rClassifier.predict(X_test[selected_features])
print(classification_report(Y_test, Y_RCF))
efs_naive = ExhaustiveFeatureSelector(GaussianNB(),
                                      max_features=6,
                                      scoring='roc_auc',
                                      cv=4)
efs_naive_fit = efs_naive.fit(X_train, Y_train)
selected_features_naive = X_train.columns[list(efs_naive_fit.best_idx_)]

Пример #31

0

Показать файл

print('Accuracy on training set: {}'.format(
    roc_auc_score(train_labels, train_pred[:, 1])))

test_pred = clf.predict_proba(test_features[filtered_features].fillna(0))
print('Accuracy on test set: {}'.format(
    roc_auc_score(test_labels, test_pred[:, 1])))

##################################### XGBoost - Exhaustive feature Selector ############################################

from mlxtend.feature_selection import ExhaustiveFeatureSelector
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score

feature_selector = ExhaustiveFeatureSelector(XGBClassifier(),
                                             min_features=2,
                                             max_features=10,
                                             scoring='roc_auc',
                                             print_progress=True,
                                             cv=2)

features = feature_selector.fit(np.array(train_features.fillna(0)),
                                train_labels)

print(type(features))
filtered_features = train_features.columns[list(features.best_idx_)]
print(filtered_features)

# see result of XGBoost:

clf = XGBClassifier(min_child_weight=5,
                    gamma=0.5,
                    subsample=0.6,

Пример #32

0

Показать файл

Файл: DFExhaustiveFeatureSelector.py Проект: Hann-THL/DATA_SCIENCE

 def __init__(self, columns=None, **kwargs):
     self.columns = columns
     self.selector = ExhaustiveFeatureSelector(**kwargs)
     self.transform_cols = None
     self.stat_df = None

Пример #33

0

Показать файл

Файл: exhaustive_feature_selection.py Проект: SamTharani/Scikit-SampleProjects

correlated_features = set()
correlation_matrix = paribas_data.corr()
for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[
                i, j]) > 0.8:  #0.8 is a correlation threshold value
            column_name = correlation_matrix.columns[i]
            correlated_features.add(column_name)

train_features.drop(labels=correlated_features, axis=1, inplace=True)
test_features.drop(labels=correlated_features, axis=1, inplace=True)

feature_selector = ExhaustiveFeatureSelector(RandomForestClassifier(n_jobs=-1),
                                             min_features=2,
                                             max_features=4,
                                             scoring='roc_auc',
                                             print_progress=True,
                                             cv=2)
features = feature_selector.fit(np.array(train_features.fillna(0)),
                                train_labels)

filtered_features = train_features.columns[list(features.k_feature_idx_)]

clf = RandomForestClassifier(n_estimators=100, random_state=41, max_depth=3)
clf.fit(train_features[filtered_features].fillna(0), train_labels)

train_pred = clf.predict_proba(train_features[filtered_features].fillna(0))
print('Accuracy on training set: {}'.format(
    roc_auc_score(train_labels, train_pred[:, 1])))

test_pred = clf.predict_proba(test_features[filtered_features].fillna(0))

Пример #34

0

Показать файл

Файл: feature_selection.py Проект: GeorgiosKalantzis/Audio_GenreClassification

that employs a search strategy to look through the space of possible feature subsets,
evaluating each subset based on the quality of the performance of a given algorithm.
"""
"""
EXHAUSTIVE FEATURE SELECTION.
This method searches across all possible feature combinations.
Its aim is to find the best performing feature subset.
"""
# import the algorithm you want to evaluate on your features.
from mlxtend.feature_selection import ExhaustiveFeatureSelector
from sklearn.ensemble import RandomForestClassifier

# create the ExhaustiveFeatureSelector object.
efs = ExhaustiveFeatureSelector(RandomForestClassifier(),
                                min_features=45,
                                max_features=70,
                                scoring='accuracy',
                                cv=2)

# fit the object to the training data.
efs = efs.fit(x, y)

# print the selected features.
selected_features1 = x.columns[list(efs.k_feature_idx_)]
print('selected features from exhaustive selection:', selected_features1)

# print the final prediction score.
print('accuracy:', efs.k_score_)

# transform to the newly selected features.
#X_train = efs.transform(X_train)

Пример #35

0

Показать файл

Файл: feature_selection.py Проект: souvikjana9993/codesnippets

"""
mlxtend ExhaustiveFeatureSelector, selects best subset of features from all possible combinations of the features
link :http://rasbt.github.io/mlxtend/user_guide/feature_selection/ExhaustiveFeatureSelector/
"""
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
efs_1 = EFS(knn,
            min_features=1,
            max_features=4,
            scoring='accuracy',
            print_progress=True,
            cv=5)  ##where knn is the model

efs_1.get_metric_dict()
print('Selected features:', efs1.best_idx_)
"""
Sequential feature selection is better computationally compared to EFS, however they should not be applied along with embedded feature selection methods like LASSO
Compared to RFE its more computation intensive as it relies on a metric for feature selection,
whereas RFE relies on weight coefficients(linear) or feature importances(tree based algos)

There are 4 different flavors of SFAs available via the SequentialFeatureSelector:

Sequential Forward Selection (SFS)
Sequential Backward Selection (SBS)
Sequential Forward Floating Selection (SFFS)
Sequential Backward Floating Selection (SBFS)
link: http://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/
"""
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

sfs1 = SFS(knn,
           k_features=3,

Пример #36

0

Показать файл

Файл: lab2_feature selection.py Проект: mbharti321/MCA4

    print("The selected feature list:")
    print(feat_cols)
elif (choice == 2):
    sfs1 = sfs(clf,
               k_features=4,
               forward=False,
               floating=False,
               verbose=2,
               scoring='accuracy',
               cv=5)
    # Perform SFFS
    sfs1 = sfs1.fit(X_train, y_train)

    feat_cols = list(sfs1.k_feature_idx_)
    print("******")
    print("The selected feature list:")
    print(feat_cols)
elif (choice == 3):
    efs1 = EFS(knn,
               min_features=4,
               max_features=5,
               scoring='accuracy',
               print_progress=True,
               cv=5)
    efs1 = efs1.fit(X_train, y_train)
    feat_cols = list(efs1.best_idx_)
    print("******")
    print("The selected feature list:")
    print(feat_cols)
else:
    print("Wrong Input")

Пример #37

0

Показать файл

    import ExhaustiveFeatureSelector as EFS
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

#%% load sample data
iris = load_iris()
x = pd.DataFrame(iris.data, \
    columns=iris.feature_names)

#%% create a logistic regression object
lr = LogisticRegression()

#%% create an EFS object
efs = EFS(estimator=lr,        
          min_features=1,      
          max_features=3,      
          scoring='accuracy',  
          cv=5)

#%% fit the model
efs = efs.fit(x, iris.target)

#%% show the selected features
efs.best_feature_names_
# console output:
# ('sepal length (cm)', 'petal length (cm)', 
# 'petal width (cm)')

#%% show a full report on the feature selection
efs_results = pd.DataFrame(efs.get_metric_dict()).\
    T. \

Пример #38

0

Показать файл

         for j in range(i):
             if abs(corr_matrix.iloc[i,j])>threshold:
                 colname=corr_matrix.columns[i]
                 col_corr.add(colname)
    return col_corr             
                 
corr_features=correlation(X_train,0.8)
print('correlated features:',len(set(corr_features)))

X_train.drop(labels=corr_features,axis=1,inplace=True)
X_test.drop(labels=corr_features,axis=1,inplace=True)

efs1=EFS(RandomForestClassifier(n_jobs=4,random_state=0),
         min_features=1 ,
         max_features=4,
         scoring='roc_auc',
         print_progress = True,
         cv=2
         )

efs1=efs1.fit(np.array(X_train[X_train.columns[0:4]].fillna(0)),y_train)
select_feat= X_train.columns[list(efs1.best_idx_)]
select_feat

def run_randomForests(X_train,X_test,y_train,y_test):
    rf=RandomForestClassifier(n_estimators=200,random_state=39,max_depth=4)
    rf.fit(X_train,y_train)
    print('Train set')
    pred=rf.predict_proba(X_train)
    print('Random Forests roc_auc :{}'.format(roc_auc_score(y_train,pred[:,1])))
    print('Test set')

Пример #39

0

Показать файл

df = df.sort_values(by=['importances'])
print('\n\n')

for feature_choices in [10, 20, 30, 40, 50]:
    for max_len in [5, 10]:

        these_choices = df.tail(feature_choices)
        #print(these_choices)
        #print(df)
        test_cols = these_choices['feature'].values
        print(test_cols)
        efs = EFS(
            estimator=rfc,
            min_features=3,
            max_features=max_len,
            print_progress=False,
            scoring='accuracy',
            n_jobs=15,
            cv=4,
        )

        start_time = time.time()
        try:
            efs = efs.fit(X_train[test_cols], y_train)
        except:
            continue
        end_time = time.time()
        #print()
        #print(feature_choices, end_time - start_time)
        best_features = list(efs.best_feature_names_)
        best_score = efs.best_score_

Пример #40

0

Показать файл

Файл: HyperEFS.py Проект: Aniruddha25/sklearn1

num_chunks = pd.read_csv("train_numeric.csv",
                         index_col=0,
                         usecols=list(range(969)),
                         chunksize=100000,
                         dtype=np.float32)
X = pd.concat([
    pd.concat([dchunk, nchunk], axis=1).sample(frac=0.05)
    for dchunk, nchunk in zip(date_chunks, num_chunks)
])
y = pd.read_csv("train_numeric.csv",
                index_col=0,
                usecols=[0, 969],
                dtype=np.float32).loc[X.index].values.ravel()
X = X.values
model = XGBClassifier()
efs1 = EFS(model, min_features=100, max_features=900, scoring='accuracy', cv=5)

efs1 = efs1.fit(X, y)

print('Best accuracy score: %.2f' % efs1.best_score_)
important_indices = efs1.best_idx_

# Got important_indices from above code
#important_indices = []
print("Found important features %s" % important_indices)
# load entire dataset for these features.
# note where the feature indices are split so we can load the correct ones straight from read_csv
n_date_features = 1156
X = np.concatenate([
    pd.read_csv("train_date.csv",
                index_col=0,

Python ExhaustiveFeatureSelector примеры использования