示例#1
0
def initialize_model():
    '''
    Calls on the StatusModel class, processes training data and trains model.

    Returns:
    StatusModel trained with 2012-2014 3-year loan data. statusmodel class. 
    '''
    model = StatusModel(model=RandomForestRegressor,
                        parameters={
                            'n_estimators': 100,
                            'max_depth': 10
                        })

    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv',
                            header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv',
                            header=True).iloc[:-2, :]
    except (OSError, IOError):
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_train = pd.concat((df_3c, df_3b), axis=0)
    df_train = process_features(df_train,
                                restrict_date=True,
                                current_loans=True)

    model.train_model(df_train)
    dump_to_pickle(model, 'pickle/model.pkl')

    return model
示例#2
0
def initialize_model():
    '''
    Calls on the StatusModel class, processes training data and trains model.

    Returns:
    StatusModel trained with 2012-2014 3-year loan data. statusmodel class. 
    '''
    model = StatusModel(model=RandomForestRegressor,
                        parameters={'n_estimators':100,
                                     'max_depth':10})

    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :]
    except (OSError, IOError):
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_train = pd.concat((df_3c, df_3b), axis=0)
    df_train = process_features(df_train, restrict_date=True, current_loans=True)

    model.train_model(df_train)
    dump_to_pickle(model, 'pickle/model.pkl')

    return model
示例#3
0
文件: test.py 项目: nhu2000/rateflask

def test_expected_current():
    print "Loading data..."
    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv',
                            header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv',
                            header=True).iloc[:-2, :]
    except OSError, IOError:
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_raw = pd.concat((df_3c, df_3b), axis=0)

    print "Pre-processing data..."
    df = process_features(df_raw)

    print "Initializing model..."
    model = StatusModel(model=RandomForestRegressor,
                        parameters={
                            'n_estimators': 100,
                            'max_depth': 10
                        })

    print "Training model..."
    try:
        model = load_from_pickle('pickle/model.pkl')
    except OSError, IOError:
        print "Model not found. Training model, this might take some time..."
        model.train_model(df)
        dump_to_pickle(model, 'pickle/model.pkl')
示例#4
0
文件: test.py 项目: nhu2000/rateflask
from model.start import initialize_model
from model.validate import actual_IRR


def test_expected_current():
    print "Loading data..."
    try:
        df_3c = pd.read_csv("data/LoanStats3c_securev1.csv", header=True).iloc[:-2, :]
        df_3b = pd.read_csv("data/LoanStats3b_securev1.csv", header=True).iloc[:-2, :]
    except OSError, IOError:
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_raw = pd.concat((df_3c, df_3b), axis=0)

    print "Pre-processing data..."
    df = process_features(df_raw)

    print "Initializing model..."
    model = StatusModel(model=RandomForestRegressor, parameters={"n_estimators": 100, "max_depth": 10})

    print "Training model..."
    try:
        model = load_from_pickle("pickle/model.pkl")
    except OSError, IOError:
        print "Model not found. Training model, this might take some time..."
        model.train_model(df)
        dump_to_pickle(model, "pickle/model.pkl")

    print "Calculating IRR..."
    int_rate_dict = {
        "A1": 0.0603,
示例#5
0
    print "Inserting results of API request to database..."
    try:
        insert_into_mongodb(loan_results, loan_details)
    except Exception:
        print "MongoDB error, proceeding to next step..."

    print "Loading model..."
    try:
        model = load_from_pickle('pickle/model.pkl')
    except OSError, IOError:
        print "Model not found. Initializing training process, this might take some time..."
        model = initialize_model()

    print "Pre-processing data..."
    df_raw = process_requests(loan_results, loan_details)
    df = process_features(df_raw, restrict_date=False, features_dict=model.features_dict)

    print "Calculating results for display..."
    df_display = process_for_display(model, df, loan_results)

    print "Inserting processed data to database..."
    try:
        insert_into_postgresql(df_display)
    except Exception:
        print "PostgreSQL error, proceeding to next step..."

    print "Generating data for charts..."
    df_max = df_display.groupby('sub_grade').max()['IRR']
    generate_for_charts(df_max)

    print "Reformatting for display..."