示例#1
0
def test_feature_importance_errors_non_dataframe():
    x = np.random.random(100)
    y = np.random.random(100)
    z = ["pos", "neg"] * 50
    df = pd.DataFrame(list(zip(x, y, z)))
    df.columns = ["x", "y", "Metadata_compound"]
    with pytest.raises(ValueError):
        feature_selection.feature_importance(x, "pos", "neg", compound_col="incorrect")
示例#2
0
def test_feature_importance_returns_all_feature_columns():
    x, y = make_classification(n_samples=100, n_features=10, n_informative=2)
    x = pd.DataFrame(x)
    x.columns = ["x" + str(i) for i in range(1, 11)]
    x["Metadata_compound"] = ["pos", "neg"] * 50
    out = feature_selection.feature_importance(
        data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound"
    )
    assert len(list(out)) == 10
示例#3
0
def test_feature_importance_returns_colnames():
    x, y = make_classification(n_samples=100, n_features=10, n_informative=2)
    x = pd.DataFrame(x)
    x.columns = ["x" + str(i) for i in range(1, 11)]
    x["Metadata_compound"] = ["pos", "neg"] * 50
    out = feature_selection.feature_importance(
        data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound"
    )
    f_names, importances = list(zip(*out))
    feature_col_names = utils.get_featuredata(x)
    assert list(f_names) == list(feature_col_names)
def test_feature_importance_sort():
    x, y = make_classification(n_samples=100, n_features=10, n_informative=2)
    x = pd.DataFrame(x)
    x.columns = ["x" + str(i) for i in range(1, 11)]
    x["Metadata_compound"] = ["pos", "neg"] * 50
    out = feature_selection.feature_importance(
        data=x,
        neg_cmpd="neg",
        pos_cmpd="pos",
        compound_col="Metadata_compound",
        sort=True)
    f_names, importances = list(zip(*out))
    sorted_importances = sorted(list(importances), reverse=True)
    assert sorted_importances == list(importances)
示例#5
0
def test_feature_importance_sort():
    x, y = make_classification(n_samples=100, n_features=10, n_informative=2)
    x = pd.DataFrame(x)
    x.columns = ["x" + str(i) for i in range(1, 11)]
    x["Metadata_compound"] = ["pos", "neg"] * 50
    out = feature_selection.feature_importance(
        data=x,
        neg_cmpd="neg",
        pos_cmpd="pos",
        compound_col="Metadata_compound",
        sort=True,
    )
    f_names, importances = list(zip(*out))
    sorted_importances = sorted(list(importances), reverse=True)
    assert sorted_importances == list(importances)