def process(data_in, config_features='config'): ''' process data_in CSV with woe transformations :config_features = 'config.csv' :data_in :data_out :config_woe=None :return: None ''' config_path = os.getcwd() + '/' + config_features + '.csv' data_path = os.getcwd() + '/' + data_in + '.csv' feature_detail_path = os.getcwd() + '/' + data_in + '_stats_wide' + '.csv' rst_pkl_path = os.getcwd() + '/woe_rule.pkl' # train woe rule feature_detail, rst = fp.process_train_woe( infile_path=data_path, outfile_path=feature_detail_path, rst_path=rst_pkl_path, config_path=config_path) # proc woe transformation woe_train_path = os.getcwd() + '/' + data_in + '_out' + '.csv' fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path) #make short statistics in excel stats = (feature_detail.groupby('var_name').agg({ 'iv': max }).sort_values(['iv'], ascending=False)) writer = pd.ExcelWriter(data_in + '_stats.xlsx') stats.to_excel(writer, 'Short') feature_detail.to_excel(writer, 'Full') writer.save() return feature_detail
def transform(self, X, y=None): """Transform the X features into WOE features. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] The target values. """ if self.config_filepath is None: raise ValueError("Config filepath does not exist." " Please define a config file first.") if self.load_woe_pickle_filepath is None: raise ValueError("Load WOE pickle filepath does not exist." " Either fit model or load model first.") data = X.copy() data['target'] = -1 # with open(os.devnull, 'w') as devnull: # with contextlib.redirect_stdout(devnull): df_transformed = fp.process_woe_trans( data, self.load_woe_pickle_filepath, self.config_filepath) X_processed = df_transformed.drop('target', axis=1) return X_processed
import woe.GridSearch as gs if __name__ == '__main__': config_path = os.getcwd() + '\\config.csv' data_path = os.getcwd() + '\\UCI_Credit_Card.csv' feature_detail_path = os.getcwd() + '\\features_detail.csv' rst_pkl_path = os.getcwd() + '\\woe_rule.pkl' # train woe rule feature_detail, rst = fp.process_train_woe( infile_path=data_path, outfile_path=feature_detail_path, rst_path=rst_pkl_path, config_path=config_path) # proc woe transformation woe_train_path = os.getcwd() + '\\dataset_train_woed.csv' fp.process_woe_trans(data_path, rst_pkl_path, woe_train_path, config_path) # here i take the same dataset as test dataset woe_test_path = os.getcwd() + '\\dataset_test_woed.csv' fp.process_woe_trans(data_path, rst_pkl_path, woe_test_path, config_path) print('###TRAIN SCORECARD MODEL###') params = {} params['dataset_path'] = woe_train_path params['validation_path'] = woe_test_path params['config_path'] = config_path params['df_coef_path'] = os.getcwd() + '\\df_model_coef_path.csv' params['pic_coefpath'] = os.getcwd() + '\\model_coefpath.png' params['pic_performance'] = os.getcwd() + '\\model_performance_path.png' params['pic_coefpath_title'] = 'model_coefpath' params['pic_performance_title'] = 'model_performance_path'