def initialize_model(): ''' Calls on the StatusModel class, processes training data and trains model. Returns: StatusModel trained with 2012-2014 3-year loan data. statusmodel class. ''' model = StatusModel(model=RandomForestRegressor, parameters={ 'n_estimators': 100, 'max_depth': 10 }) try: df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :] df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :] except (OSError, IOError): print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_train = pd.concat((df_3c, df_3b), axis=0) df_train = process_features(df_train, restrict_date=True, current_loans=True) model.train_model(df_train) dump_to_pickle(model, 'pickle/model.pkl') return model
def initialize_model(): ''' Calls on the StatusModel class, processes training data and trains model. Returns: StatusModel trained with 2012-2014 3-year loan data. statusmodel class. ''' model = StatusModel(model=RandomForestRegressor, parameters={'n_estimators':100, 'max_depth':10}) try: df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :] df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :] except (OSError, IOError): print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_train = pd.concat((df_3c, df_3b), axis=0) df_train = process_features(df_train, restrict_date=True, current_loans=True) model.train_model(df_train) dump_to_pickle(model, 'pickle/model.pkl') return model
def test_expected_current(): print "Loading data..." try: df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :] df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :] except OSError, IOError: print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_raw = pd.concat((df_3c, df_3b), axis=0) print "Pre-processing data..." df = process_features(df_raw) print "Initializing model..." model = StatusModel(model=RandomForestRegressor, parameters={ 'n_estimators': 100, 'max_depth': 10 }) print "Training model..." try: model = load_from_pickle('pickle/model.pkl') except OSError, IOError: print "Model not found. Training model, this might take some time..." model.train_model(df) dump_to_pickle(model, 'pickle/model.pkl')
from model.start import initialize_model from model.validate import actual_IRR def test_expected_current(): print "Loading data..." try: df_3c = pd.read_csv("data/LoanStats3c_securev1.csv", header=True).iloc[:-2, :] df_3b = pd.read_csv("data/LoanStats3b_securev1.csv", header=True).iloc[:-2, :] except OSError, IOError: print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_raw = pd.concat((df_3c, df_3b), axis=0) print "Pre-processing data..." df = process_features(df_raw) print "Initializing model..." model = StatusModel(model=RandomForestRegressor, parameters={"n_estimators": 100, "max_depth": 10}) print "Training model..." try: model = load_from_pickle("pickle/model.pkl") except OSError, IOError: print "Model not found. Training model, this might take some time..." model.train_model(df) dump_to_pickle(model, "pickle/model.pkl") print "Calculating IRR..." int_rate_dict = { "A1": 0.0603,
print "Inserting results of API request to database..." try: insert_into_mongodb(loan_results, loan_details) except Exception: print "MongoDB error, proceeding to next step..." print "Loading model..." try: model = load_from_pickle('pickle/model.pkl') except OSError, IOError: print "Model not found. Initializing training process, this might take some time..." model = initialize_model() print "Pre-processing data..." df_raw = process_requests(loan_results, loan_details) df = process_features(df_raw, restrict_date=False, features_dict=model.features_dict) print "Calculating results for display..." df_display = process_for_display(model, df, loan_results) print "Inserting processed data to database..." try: insert_into_postgresql(df_display) except Exception: print "PostgreSQL error, proceeding to next step..." print "Generating data for charts..." df_max = df_display.groupby('sub_grade').max()['IRR'] generate_for_charts(df_max) print "Reformatting for display..."