Пример #1
0
def test_few_runs():
    # featsel with numpy arrays
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0, featsel_runs=0)
    new_X = fsel.fit_transform(X, target)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
    fsel = FeatureSelector(verbose=0, featsel_runs=1)
    new_X = fsel.fit_transform(X, target)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
Пример #2
0
def test_keep():
    # featsel with df with column names
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0, keep=[2, "x5"])
    new_X = fsel.fit_transform(pd.DataFrame(X, columns=[1, 2, "3", "x4", "x5", "eng6", "eng7"]), target)
    assert isinstance(new_X, pd.DataFrame)
    assert set(new_X.columns) == set(["1", "eng6", "eng7", "2", "x5"]), "Wrong features selected (%r)" % new_X.columns
Пример #3
0
def test_regular_df_X_y():
    # featsel with df without column names
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0)
    new_X = fsel.fit_transform(pd.DataFrame(X), pd.DataFrame(target))
    assert isinstance(new_X, pd.DataFrame)
    assert set(new_X.columns) == set([0, 5, 6]), "Wrong features selected (%r)" % new_X.columns
Пример #4
0
def test_regular_X_y():
    # featsel with numpy arrays
    X, target = get_random_data()
    fsel = FeatureSelector(verbose=0)
    new_X = fsel.fit_transform(X, target)
    assert isinstance(new_X, np.ndarray)
    assert new_X.shape[1] == 3, "Wrong number of features selected"
Пример #5
0
def test_nans():
    # featsel with df without column names
    X, target = get_random_data()
    X[998, 0] = np.nan
    X[999, 1] = np.nan
    fsel = FeatureSelector(verbose=0)
    try:
        _ = fsel.fit(pd.DataFrame(X), target)
    except ValueError:
        pass
    else:
        raise AssertionError("fit with NaNs should throw an error")
    _ = fsel.fit_transform(pd.DataFrame(X[:900]), target[:900])
    df = fsel.transform(pd.DataFrame(X))
    assert pd.isna(df[0].iloc[998]), "The first feature should be NaN"
    assert np.sum(pd.isna(df).to_numpy(dtype=int)) == 1, "only 1 place should be NaN"
    assert set(df.columns) == set([0, 5, 6]), "Wrong features selected (%r)" % df.columns