def test_select_fpr_classif():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple classification problem
    with the fpr heuristic
    """
    X, Y = make_classification(n_samples=200,
                               n_features=20,
                               n_informative=3,
                               n_redundant=2,
                               n_repeated=0,
                               n_classes=8,
                               n_clusters_per_class=1,
                               flip_y=0.0,
                               class_sep=10,
                               shuffle=False,
                               random_state=0)

    univariate_filter = SelectFpr(f_classif, alpha=0.0001)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_classif, mode='fpr',
                                   param=0.0001).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support, gtruth)
def test_select_fpr_classif():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple classification problem
    with the fpr heuristic
    """
    X, Y = make_classification(
        n_samples=200,
        n_features=20,
        n_informative=3,
        n_redundant=2,
        n_repeated=0,
        n_classes=8,
        n_clusters_per_class=1,
        flip_y=0.0,
        class_sep=10,
        shuffle=False,
        random_state=0,
    )

    univariate_filter = SelectFpr(f_classif, alpha=0.0001)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_classif, mode="fpr", param=0.0001).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert_array_equal(support, gtruth)
def test_select_fpr_regression():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple regression problem
    with the fpr heuristic
    """
    X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFpr(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode="fpr", param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert (support[:5] == 1).all()
    assert np.sum(support[5:] == 1) < 3
示例#4
0
def test_select_fpr_regression():
    """
    Test whether the relative univariate feature selection
    gets the correct items in a simple regression problem
    with the fpr heuristic
    """
    X, Y = make_regression(n_samples=200, n_features=20,
                           n_informative=5, shuffle=False, random_state=0)

    univariate_filter = SelectFpr(f_regression, alpha=0.01)
    X_r = univariate_filter.fit(X, Y).transform(X)
    X_r2 = GenericUnivariateSelect(f_regression, mode='fpr',
                    param=0.01).fit(X, Y).transform(X)
    assert_array_equal(X_r, X_r2)
    support = univariate_filter.get_support()
    gtruth = np.zeros(20)
    gtruth[:5] = 1
    assert(support[:5] == 1).all()
    assert(np.sum(support[5:] == 1) < 3)
示例#5
0
 def fit(self, X, y):
     SelectFpr.fit(self, X,y)
     MySelectFpr.total_n_feats += self.get_support().sum()
     return self