Пример #1
0
def TrainStacking(df_train1, df_train2, models):
    y_stack = df_train2[target].values.astype(float)

    X_stack = np.array([])
    for dt, mt, m, C in models:
        if dt == 'nume':
            X_train1 = df_train1[C].values.astype(float)
            X_train2 = df_train2[C].values.astype(float)
        else:
            X_train1 = df_train1[C].values
            X_train2 = df_train2[C].values
            X_train1, X_train2 = dp.CateEncoderTwoMat(X_train1, X_train2)

        y_train = df_train1[target].values.astype(float)
        if mt == 'classification':
            y_train = np.where(y_train > 0, 1, 0)
        m.fit(X_train1, y_train)

        p_stack = m.predict(X_train2)
        p_stack = np.reshape(p_stack, (len(p_stack), 1))
        X_stack = np.hstack((X_stack, p_stack)) if X_stack.size else p_stack

    model_stack = LinearRegression()
    model_stack.fit(X_stack, y_stack)
    # stack_coef = model.coef_

    return models, model_stack
Пример #2
0
def TrainAndValidationClassification(df_train,
                                     df_val,
                                     model,
                                     columns,
                                     data_type='nume'):
    sys.stdout.write('Extract data ... ')
    sys.stdout.flush()

    if data_type == 'nume':
        X_train = df_train[columns].values.astype(float)
        X_val = df_val[columns].values.astype(float)
    else:
        X_train = df_train[columns].values
        X_val = df_val[columns].values
        X_train, X_val = dp.CateEncoderTwoMat(X_train, X_val)

    target = 'logerror_sign'
    y_train = df_train[target].values.astype(float)
    y_val = df_val[target].values.astype(float)

    # y_train = abs(y_train)
    # y_val = abs(y_val)

    sys.stdout.write('done.\n')

    sys.stdout.write('Train ... ')
    sys.stdout.flush()

    model.fit(X_train, y_train)

    sys.stdout.write('done.\n')

    sys.stdout.write('Predict ...')
    sys.stdout.flush()

    p_train = model.predict(X_train)
    p_val = model.predict(X_val)

    sys.stdout.write('done.\n')

    acc_train = (p_train == y_train).sum() / float(len(y_train))
    acc_val = (p_val == y_val).sum() / float(len(y_val))

    print 'Accuracy in train :', acc_train
    print 'Accuracy in validation :', acc_val

    return y_train, y_val, p_train, p_val
Пример #3
0
def TrainAndValidation(df_train, df_val, model, columns, data_type='nume'):
    sys.stdout.write('Extract data ... ')
    sys.stdout.flush()

    if data_type == 'nume':
        X_train = df_train[columns].values.astype(float)
        X_val = df_val[columns].values.astype(float)
    else:
        X_train = df_train[columns].values
        X_val = df_val[columns].values
        X_train, X_val = dp.CateEncoderTwoMat(X_train, X_val)

    y_train = df_train[target].values.astype(float)
    y_val = df_val[target].values.astype(float)

    # y_train = abs(y_train)
    # y_val = abs(y_val)

    sys.stdout.write('done.\n')

    sys.stdout.write('Train ... ')
    sys.stdout.flush()

    model.fit(X_train, y_train)

    sys.stdout.write('done.\n')

    sys.stdout.write('Predict ...')
    sys.stdout.flush()

    p_train = model.predict(X_train)
    p_val = model.predict(X_val)

    sys.stdout.write('done.\n')

    mae_train = mean_absolute_error(y_train, p_train)
    mae_val = mean_absolute_error(y_val, p_val)

    print 'MAE in train :', mae_train
    print 'MAE in validation :', mae_val

    return y_train, y_val, p_train, p_val