import build import pandas as pd from pipeline import Pipeline PATH = 'cs-training.csv' TEST = explore.snake_columns(pd.read_csv('cs-test.csv', index_col=0)) my_pipeline = Pipeline(PATH, TEST) # explore print('exploring') my_pipeline.explore() # add features print('adding features') #my_pipeline.add_col('monthly_income') my_pipeline.add_col('age', transform_type='squared') my_pipeline.create_binary('number_of_dependents') my_pipeline.split('serious_dlqin2yrs') # process print('processing') my_pipeline.impute('number_of_dependents', method='mode') my_pipeline.impute('monthly_income', method='median') my_pipeline.export_train_tocsv() # build models #['KNN','RF','LR','ET','AB','GB','NB','DT'] models=['KNN','DT','NB','LR'] #get X and y print('building models')
import build import pandas as pd from pipeline import Pipeline PATH = 'cs-training.csv' TEST = explore.snake_columns(pd.read_csv('cs-test.csv', index_col=0)) my_pipeline = Pipeline(PATH, TEST) # explore print('exploring') my_pipeline.explore() # process print('processing') my_pipeline.impute('number_of_dependents', method='mode') my_pipeline.impute('monthly_income', method='mean') # add features print('adding features') my_pipeline.add_col('monthly_income') my_pipeline.add_col('age', transform_type='squared') my_pipeline.create_binary('number_of_dependents') my_pipeline.split('serious_dlqin2yrs') my_pipeline.export_train_tocsv() # build models print('building models') my_pipeline.build('logistic') my_pipeline.build('knn') print(my_pipeline.models)