Python process_features示例，helpers.preprocessing.process_features Python示例

示例#1

0

显示文件

def initialize_model():
    '''
    Calls on the StatusModel class, processes training data and trains model.

    Returns:
    StatusModel trained with 2012-2014 3-year loan data. statusmodel class. 
    '''
    model = StatusModel(model=RandomForestRegressor,
                        parameters={
                            'n_estimators': 100,
                            'max_depth': 10
                        })

    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv',
                            header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv',
                            header=True).iloc[:-2, :]
    except (OSError, IOError):
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_train = pd.concat((df_3c, df_3b), axis=0)
    df_train = process_features(df_train,
                                restrict_date=True,
                                current_loans=True)

    model.train_model(df_train)
    dump_to_pickle(model, 'pickle/model.pkl')

    return model

示例#2

0

显示文件

文件： start.py 项目： DeanCope/rateflask

def initialize_model():
    '''
    Calls on the StatusModel class, processes training data and trains model.

    Returns:
    StatusModel trained with 2012-2014 3-year loan data. statusmodel class. 
    '''
    model = StatusModel(model=RandomForestRegressor,
                        parameters={'n_estimators':100,
                                     'max_depth':10})

    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv', header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv', header=True).iloc[:-2, :]
    except (OSError, IOError):
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_train = pd.concat((df_3c, df_3b), axis=0)
    df_train = process_features(df_train, restrict_date=True, current_loans=True)

    model.train_model(df_train)
    dump_to_pickle(model, 'pickle/model.pkl')

    return model

示例#3

0

显示文件

文件： test.py 项目： nhu2000/rateflask


def test_expected_current():
    print "Loading data..."
    try:
        df_3c = pd.read_csv('data/LoanStats3c_securev1.csv',
                            header=True).iloc[:-2, :]
        df_3b = pd.read_csv('data/LoanStats3b_securev1.csv',
                            header=True).iloc[:-2, :]
    except OSError, IOError:
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_raw = pd.concat((df_3c, df_3b), axis=0)

    print "Pre-processing data..."
    df = process_features(df_raw)

    print "Initializing model..."
    model = StatusModel(model=RandomForestRegressor,
                        parameters={
                            'n_estimators': 100,
                            'max_depth': 10
                        })

    print "Training model..."
    try:
        model = load_from_pickle('pickle/model.pkl')
    except OSError, IOError:
        print "Model not found. Training model, this might take some time..."
        model.train_model(df)
        dump_to_pickle(model, 'pickle/model.pkl')

示例#4

0

显示文件

文件： test.py 项目： nhu2000/rateflask

from model.start import initialize_model
from model.validate import actual_IRR


def test_expected_current():
    print "Loading data..."
    try:
        df_3c = pd.read_csv("data/LoanStats3c_securev1.csv", header=True).iloc[:-2, :]
        df_3b = pd.read_csv("data/LoanStats3b_securev1.csv", header=True).iloc[:-2, :]
    except OSError, IOError:
        print "Training data not found. Please install from https://www.lendingclub.com/info/download-data.action"

    df_raw = pd.concat((df_3c, df_3b), axis=0)

    print "Pre-processing data..."
    df = process_features(df_raw)

    print "Initializing model..."
    model = StatusModel(model=RandomForestRegressor, parameters={"n_estimators": 100, "max_depth": 10})

    print "Training model..."
    try:
        model = load_from_pickle("pickle/model.pkl")
    except OSError, IOError:
        print "Model not found. Training model, this might take some time..."
        model.train_model(df)
        dump_to_pickle(model, "pickle/model.pkl")

    print "Calculating IRR..."
    int_rate_dict = {
        "A1": 0.0603,

示例#5

0

显示文件

    print "Inserting results of API request to database..."
    try:
        insert_into_mongodb(loan_results, loan_details)
    except Exception:
        print "MongoDB error, proceeding to next step..."

    print "Loading model..."
    try:
        model = load_from_pickle('pickle/model.pkl')
    except OSError, IOError:
        print "Model not found. Initializing training process, this might take some time..."
        model = initialize_model()

    print "Pre-processing data..."
    df_raw = process_requests(loan_results, loan_details)
    df = process_features(df_raw, restrict_date=False, features_dict=model.features_dict)

    print "Calculating results for display..."
    df_display = process_for_display(model, df, loan_results)

    print "Inserting processed data to database..."
    try:
        insert_into_postgresql(df_display)
    except Exception:
        print "PostgreSQL error, proceeding to next step..."

    print "Generating data for charts..."
    df_max = df_display.groupby('sub_grade').max()['IRR']
    generate_for_charts(df_max)

    print "Reformatting for display..."