Пример #1
0
def test_predictors():
    y = "Survived"
    df = pd.read_csv("examples/titanic.csv")
    df = df[["Age", y]]

    result_df = pps.predictors(df, y)
    assert isinstance(result_df, pd.DataFrame)
    assert not y in result_df.index

    list_of_dicts = pps.predictors(df, y, output="list")
    assert isinstance(list_of_dicts, list)
    assert isinstance(list_of_dicts[0], dict)
Пример #2
0
def pred_power(clean_df, pred_column):
    pps_df = pps.predictors(clean_df,
                            y=pred_column,
                            cross_validation=4,
                            random_seed=123)
    # pps_df[pps_df['ppscore']>0]
    return pps_df.sort_values(
        by="model_score", axis=0,
        ascending=True), pps_df.sort_values(by="model_score",
                                            axis=0,
                                            ascending=True)['x'][:5].values
Пример #3
0
def test_predictors():
    y = "Survived"
    df = pd.read_csv("examples/titanic.csv")
    df = df[["Age", y]]

    duplicate_column_names_df = pd.DataFrame()
    duplicate_column_names_df["x1"] = np.random.uniform(-2, 2, 10)
    duplicate_column_names_df["x2"] = np.random.uniform(-2, 2, 10)
    duplicate_column_names_df["unique_column_name"] = np.random.uniform(-2, 2, 10)
    duplicate_column_names_df.columns = [
        "duplicate_column_name",
        "duplicate_column_name",
        "unique_column_name",
    ]

    # check input types
    with pytest.raises(TypeError):
        numpy_array = np.random.randn(10, 10)  # not a DataFrame
        pps.predictors(numpy_array, y)

    with pytest.raises(ValueError):
        pps.predictors(df, "y_column_that_does_not_exist")

    with pytest.raises(ValueError):
        pps.predictors(df, y, output="invalid_output_type")

    with pytest.raises(ValueError):
        pps.predictors(df, y, sorted="invalid_value_for_sorted")

    with pytest.raises(AssertionError):
        # df shall not have duplicate column names
        pps.predictors(duplicate_column_names_df, "duplicate_column_name")

    # check return types
    result_df = pps.predictors(df, y)
    assert isinstance(result_df, pd.DataFrame)
    assert not y in result_df.index

    list_of_dicts = pps.predictors(df, y, output="list")
    assert isinstance(list_of_dicts, list)
    assert isinstance(list_of_dicts[0], dict)
Пример #4
0
 def get_predictors(self) -> DataFrame:
     if self.target is not None:
         self.predictors = pps.predictors(
             self.data, y=self.target)[['x', 'ppscore']].set_index(['x'])
     return self.predictors
import pandas as pd
import seaborn as sns
import ppscore as pps
from pandas import ExcelFile

traindata = 'D://Download//traindata.xlsx'

df = pd.read_excel("D://Download//traindata.xlsx")

predictors_df = pps.predictors(df, y="label")

plot = sns.barplot(data=predictors_df, x="x", y="ppscore")
plot.set_xticklabels(plot.get_xticklabels(), rotation=90)