def initialize_model(): ''' Calls on the StatusModel class, processes training data and trains model. Returns: StatusModel trained with 2012-2014 3-year loan data. statusmodel class. ''' model = StatusModel(model=RandomForestRegressor, parameters={ 'n_estimators': 100, 'max_depth': 10 }) try: df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :] df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :] except (OSError, IOError): print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_train = pd.concat((df_3c, df_3b), axis=0) df_train = process_features(df_train, restrict_date=True, current_loans=True) model.train_model(df_train) dump_to_pickle(model, 'pickle/model.pkl') return model
def initialize_model(): ''' Calls on the StatusModel class, processes training data and trains model. Returns: StatusModel trained with 2012-2014 3-year loan data. statusmodel class. ''' model = StatusModel(model=RandomForestRegressor, parameters={'n_estimators':100, 'max_depth':10}) try: df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :] df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :] except (OSError, IOError): print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action" df_train = pd.concat((df_3c, df_3b), axis=0) df_train = process_features(df_train, restrict_date=True, current_loans=True) model.train_model(df_train) dump_to_pickle(model, 'pickle/model.pkl') return model
df = process_features(df_raw) print "Initializing model..." model = StatusModel(model=RandomForestRegressor, parameters={ 'n_estimators': 100, 'max_depth': 10 }) print "Training model..." try: model = load_from_pickle('pickle/model.pkl') except OSError, IOError: print "Model not found. Training model, this might take some time..." model.train_model(df) dump_to_pickle(model, 'pickle/model.pkl') print "Calculating IRR..." int_rate_dict = { 'A1': 0.0603, 'A2': 0.0649, 'A3': 0.0699, 'A4': 0.0749, 'A5': 0.0819, 'B1': 0.0867, 'B2': 0.0949, 'B3': 0.1049, 'B4': 0.1144, 'B5': 0.1199, 'C1': 0.1239, 'C2': 0.1299,
df_raw = pd.concat((df_3c, df_3b), axis=0) print "Pre-processing data..." df = process_features(df_raw) print "Initializing model..." model = StatusModel(model=RandomForestRegressor, parameters={"n_estimators": 100, "max_depth": 10}) print "Training model..." try: model = load_from_pickle("pickle/model.pkl") except OSError, IOError: print "Model not found. Training model, this might take some time..." model.train_model(df) dump_to_pickle(model, "pickle/model.pkl") print "Calculating IRR..." int_rate_dict = { "A1": 0.0603, "A2": 0.0649, "A3": 0.0699, "A4": 0.0749, "A5": 0.0819, "B1": 0.0867, "B2": 0.0949, "B3": 0.1049, "B4": 0.1144, "B5": 0.1199, "C1": 0.1239, "C2": 0.1299,