def test_predictors(): y = "Survived" df = pd.read_csv("examples/titanic.csv") df = df[["Age", y]] result_df = pps.predictors(df, y) assert isinstance(result_df, pd.DataFrame) assert not y in result_df.index list_of_dicts = pps.predictors(df, y, output="list") assert isinstance(list_of_dicts, list) assert isinstance(list_of_dicts[0], dict)
def pred_power(clean_df, pred_column): pps_df = pps.predictors(clean_df, y=pred_column, cross_validation=4, random_seed=123) # pps_df[pps_df['ppscore']>0] return pps_df.sort_values( by="model_score", axis=0, ascending=True), pps_df.sort_values(by="model_score", axis=0, ascending=True)['x'][:5].values
def test_predictors(): y = "Survived" df = pd.read_csv("examples/titanic.csv") df = df[["Age", y]] duplicate_column_names_df = pd.DataFrame() duplicate_column_names_df["x1"] = np.random.uniform(-2, 2, 10) duplicate_column_names_df["x2"] = np.random.uniform(-2, 2, 10) duplicate_column_names_df["unique_column_name"] = np.random.uniform(-2, 2, 10) duplicate_column_names_df.columns = [ "duplicate_column_name", "duplicate_column_name", "unique_column_name", ] # check input types with pytest.raises(TypeError): numpy_array = np.random.randn(10, 10) # not a DataFrame pps.predictors(numpy_array, y) with pytest.raises(ValueError): pps.predictors(df, "y_column_that_does_not_exist") with pytest.raises(ValueError): pps.predictors(df, y, output="invalid_output_type") with pytest.raises(ValueError): pps.predictors(df, y, sorted="invalid_value_for_sorted") with pytest.raises(AssertionError): # df shall not have duplicate column names pps.predictors(duplicate_column_names_df, "duplicate_column_name") # check return types result_df = pps.predictors(df, y) assert isinstance(result_df, pd.DataFrame) assert not y in result_df.index list_of_dicts = pps.predictors(df, y, output="list") assert isinstance(list_of_dicts, list) assert isinstance(list_of_dicts[0], dict)
def get_predictors(self) -> DataFrame: if self.target is not None: self.predictors = pps.predictors( self.data, y=self.target)[['x', 'ppscore']].set_index(['x']) return self.predictors
import pandas as pd import seaborn as sns import ppscore as pps from pandas import ExcelFile traindata = 'D://Download//traindata.xlsx' df = pd.read_excel("D://Download//traindata.xlsx") predictors_df = pps.predictors(df, y="label") plot = sns.barplot(data=predictors_df, x="x", y="ppscore") plot.set_xticklabels(plot.get_xticklabels(), rotation=90)