def test_feature_importance_errors_non_dataframe(): x = np.random.random(100) y = np.random.random(100) z = ["pos", "neg"] * 50 df = pd.DataFrame(list(zip(x, y, z))) df.columns = ["x", "y", "Metadata_compound"] with pytest.raises(ValueError): feature_selection.feature_importance(x, "pos", "neg", compound_col="incorrect")
def test_feature_importance_returns_all_feature_columns(): x, y = make_classification(n_samples=100, n_features=10, n_informative=2) x = pd.DataFrame(x) x.columns = ["x" + str(i) for i in range(1, 11)] x["Metadata_compound"] = ["pos", "neg"] * 50 out = feature_selection.feature_importance( data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound" ) assert len(list(out)) == 10
def test_feature_importance_returns_colnames(): x, y = make_classification(n_samples=100, n_features=10, n_informative=2) x = pd.DataFrame(x) x.columns = ["x" + str(i) for i in range(1, 11)] x["Metadata_compound"] = ["pos", "neg"] * 50 out = feature_selection.feature_importance( data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound" ) f_names, importances = list(zip(*out)) feature_col_names = utils.get_featuredata(x) assert list(f_names) == list(feature_col_names)
def test_feature_importance_sort(): x, y = make_classification(n_samples=100, n_features=10, n_informative=2) x = pd.DataFrame(x) x.columns = ["x" + str(i) for i in range(1, 11)] x["Metadata_compound"] = ["pos", "neg"] * 50 out = feature_selection.feature_importance( data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound", sort=True) f_names, importances = list(zip(*out)) sorted_importances = sorted(list(importances), reverse=True) assert sorted_importances == list(importances)
def test_feature_importance_sort(): x, y = make_classification(n_samples=100, n_features=10, n_informative=2) x = pd.DataFrame(x) x.columns = ["x" + str(i) for i in range(1, 11)] x["Metadata_compound"] = ["pos", "neg"] * 50 out = feature_selection.feature_importance( data=x, neg_cmpd="neg", pos_cmpd="pos", compound_col="Metadata_compound", sort=True, ) f_names, importances = list(zip(*out)) sorted_importances = sorted(list(importances), reverse=True) assert sorted_importances == list(importances)