#%% #load data data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d',]) #%% '''Store info for each predictor as a named tuple containing the col-name within the pandas dataframe, the full_name (human readable), and the type of normalization to apply to that feature.''' predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) transformer_map = {'minMax':MinMaxScaler(), 'maxAbs':MaxAbsScaler(), 'standScal':StandardScaler(), 'log_minmax': LCM.log_minmax(), 'robScal':RobustScaler()} predictors = [ predictor('acc_now_delinq','num delinq accounts','maxAbs'), predictor('annual_inc','annual income','log_minmax'), predictor('collections_12_mths_ex_med','num recent collections','maxAbs'), predictor('cr_line_dur', 'duration cred line','standScal'), predictor('delinq_2yrs', 'num recent delinq','maxAbs'), predictor('desc_length', 'loan desc length','maxAbs'), predictor('dti', 'debt-income ratio','standScal'), predictor('emp_length', 'employment length','maxAbs'), # predictor('funded_amnt','loan amount','maxAbs'), predictor('loan_amnt','loan amount','maxAbs'), predictor('inq_last_6mths', 'num recent inqs','maxAbs'), predictor('int_rate', 'interest rate','maxAbs'),