def test_sparse_matrix_inputs():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y)**2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 0.20
    elif Version(sklearn_version) < Version("0.22"):
        expected_value = 0.20
    else:
        expected_value = 0.21

    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2)
    assert round(got, 2) == expected_value, got
Пример #2
0
def test_sparse_matrix_inputs():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 0.20
    else:
        expected_value = 0.19

    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)
    assert round(got, 2) == expected_value, got
Пример #3
0
def build_lasso():
    train_df, test_df = load_data()
    combined_df = pd.concat((train_df.loc[:, 'MSSubClass':'SaleCondition'],
                             test_df.loc[:, 'MSSubClass':'SaleCondition']))

    # feature engineering
    config_categorical_features(combined_df)
    # combined_df = extract_common_features(combined_df)
    log_transform_features(combined_df)
    combined_df = normalize_numerical_features(combined_df)
    combined_df = one_hot_encoding(combined_df)
    missing_value_fill(combined_df)

    X_train = combined_df[:train_df.shape[0]]
    X_test = combined_df[train_df.shape[0]:]
    y = np.log1p(train_df["SalePrice"])

    # models
    lass_model = Lasso(alpha=0.0005, max_iter=1000)

    lgb_params = {
        'lambda_l2': 0,
        'learning_rate': 0.05,
        'min_child_samples': 4,
        'n_estimators': 500,
        'num_leaves': 10
    }
    lgb_model = lgb.LGBMRegressor(**lgb_params)

    xgb_params = {'max_depth': 2, 'n_estimators': 360}
    xgb_model = xgb.XGBRegressor(**xgb_params)

    rf_params = {
        'max_depth': 50,
        'max_features': None,
        'min_samples_leaf': 4,
        'n_estimators': 50
    }
    rf_model = RandomForestRegressor(**rf_params)

    mata_model = rf_model
    model = StackingCVRegressor(regressors=(lass_model, lgb_model, xgb_model,
                                            rf_model),
                                meta_regressor=mata_model,
                                use_features_in_secondary=True)

    model.fit(np.array(X_train), np.array(y))
    print(
        "cross_validation_rmse:",
        np.mean(
            np.sqrt(-cross_val_score(model,
                                     np.array(X_train),
                                     np.array(y),
                                     cv=3,
                                     scoring="neg_mean_squared_error"))))

    # model prediction
    stack_preds = np.expm1(model.predict(np.array(X_test)))
    solution = pd.DataFrame({"id": test_df.Id, "SalePrice": stack_preds})
    solution.to_csv("./house_price/submission_stack_v1.csv", index=False)
def AlgoSCR(df_train, df_trainY, m1, m2, m3, m4, m5):
    model = StackingCVRegressor(regressors=(m1, m2, m3, m4, m5),
                                meta_regressor=m1,
                                use_features_in_secondary=True)
    rmsle_cv(model, df_train, df_trainY)
    model.fit(df_train, df_trainY)
    result = model.predict(df_train)
    print("rms value of same set: ",
          np.around(sqrt(mean_squared_error(df_trainY, result)), decimals=7))
    return model
def test_predict_meta_features():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingCVRegressor(regressors=[lr, ridge],
                                 meta_regressor=svr_rbf)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    test_meta_features = stregr.predict(X_test)
    assert test_meta_features.shape[0] == X_test.shape[0]
Пример #6
0
def test_sparse_matrix_inputs():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.20
    got = np.mean((stack.predict(X1) - y)**2)
    assert round(got, 2) == mse

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)
    mse = 0.20
    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2)
    assert round(got, 2) == mse
def test_predict_meta_features():
    lr = LinearRegression()
    svr_rbf = SVR(kernel='rbf')
    ridge = Ridge(random_state=1)
    stregr = StackingCVRegressor(regressors=[lr, ridge],
                                 meta_regressor=svr_rbf)
    X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)
    stregr.fit(X_train, y_train)
    test_meta_features = stregr.predict(X_test)
    assert test_meta_features.shape[0] == X_test.shape[0]
def test_different_models():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse
def test_different_models():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X1, y).predict(X1)
    mse = 0.21
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse
def test_multivariate():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X2, y).predict(X2)
    mse = 0.19
    got = np.mean((stack.predict(X2) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_multivariate():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf)
    stack.fit(X2, y).predict(X2)
    mse = 0.19
    got = np.mean((stack.predict(X2) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
Пример #12
0
def test_multivariate_class():
    lr = LinearRegression()
    ridge = Ridge(random_state=1)
    meta = LinearRegression(normalize=True)
    stregr = StackingCVRegressor(regressors=[lr, ridge],
                                 meta_regressor=meta,
                                 multi_output=True,
                                 random_state=0)
    stregr.fit(X2, y2).predict(X2)
    mse = 0.13
    got = np.mean((stregr.predict(X2) - y2) ** 2.)
    assert round(got, 2) == mse, got
Пример #13
0
def test_sparse_matrix_inputs_with_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42,
                                use_features_in_secondary=True)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.20
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)
    mse = 0.20
    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)
    assert round(got, 2) == mse, got
def test_sparse_matrix_inputs_with_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                random_state=42,
                                use_features_in_secondary=True)

    # dense
    stack.fit(X1, y).predict(X1)
    mse = 0.20
    got = np.mean((stack.predict(X1) - y)**2)
    assert round(got, 2) == mse, got

    # sparse
    stack.fit(sparse.csr_matrix(X1), y)
    mse = 0.20
    got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y)**2)
    assert round(got, 2) == mse, got
def test_use_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=3,
                                use_features_in_secondary=True)
    stack.fit(X1, y).predict(X1)
    mse = 0.2
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_use_features_in_secondary():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=3,
                                use_features_in_secondary=True)
    stack.fit(X1, y).predict(X1)
    mse = 0.2
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)
def test_internals():
    lr = LinearRegression()
    regressors = [lr, lr, lr, lr, lr]
    cv = 10
    stack = StackingCVRegressor(regressors=[lr, lr, lr, lr, lr],
                                meta_regressor=lr,
                                cv=cv)
    stack.fit(X3, y2)
    assert stack.predict(X3).mean() == y2.mean()
    assert stack.meta_regr_.intercept_ == 0.0
    assert stack.meta_regr_.coef_[0] == 0.0
    assert stack.meta_regr_.coef_[1] == 0.0
    assert stack.meta_regr_.coef_[2] == 0.0
    assert len(stack.regr_) == len(regressors)
def test_internals():
    lr = LinearRegression()
    regressors = [lr, lr, lr, lr, lr]
    cv = 10
    stack = StackingCVRegressor(regressors=[lr, lr, lr, lr, lr],
                                meta_regressor=lr,
                                cv=cv)
    stack.fit(X3, y2)
    assert stack.predict(X3).mean() == y2.mean()
    assert stack.meta_regr_.intercept_ == 0.0
    assert stack.meta_regr_.coef_[0] == 0.0
    assert stack.meta_regr_.coef_[1] == 0.0
    assert stack.meta_regr_.coef_[2] == 0.0
    assert len(stack.regr_) == len(regressors)
Пример #19
0
def test_sample_weight():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=KFold(4, shuffle=True, random_state=7))
    pred1 = stack.fit(X1, y, sample_weight=w).predict(X1)
    mse = 0.21  # 0.20770
    got = np.mean((stack.predict(X1) - y) ** 2)
    assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got)
    pred2 = stack.fit(X1, y).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
def test_sample_weight():
    lr = LinearRegression()
    svr_lin = SVR(kernel='linear', gamma='auto')
    ridge = Ridge(random_state=1)
    svr_rbf = SVR(kernel='rbf', gamma='auto')
    stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],
                                meta_regressor=svr_rbf,
                                cv=KFold(4, shuffle=True, random_state=7))
    pred1 = stack.fit(X1, y, sample_weight=w).predict(X1)
    mse = 0.21  # 0.20770
    got = np.mean((stack.predict(X1) - y)**2)
    assert round(got, 2) == mse, "Expected %.2f, but got %.5f" % (mse, got)
    pred2 = stack.fit(X1, y).predict(X1)
    maxdiff = np.max(np.abs(pred1 - pred2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff
Пример #21
0
class Model:
    def __init__(self,
                 lasso_target_params=LASSO_TARGET_PARAMS,
                 ridge_target_params=RIDGE_TARGET_PARAMS,
                 lasso_constraints_params=LASSO_CONSTRAINTS_PARAMS,
                 constraint_columns=CONSTRAINT_COLUMNS,
                 target_column=TARGET_COLUMN):

        self.target_estimator = StackingCVRegressor(
            regressors=(Lasso(**lasso_target_params),
                        Ridge(**ridge_target_params)),
            meta_regressor=Ridge(alpha=0.01))
        self.constraints_estimator = Lasso(**lasso_constraints_params)

        self.constraint_columns = constraint_columns
        self.target_column = target_column

        self.test_columns = None

    def fit(self, data):
        '''
        Feed dataframe
        '''
        train, labels = generate_train_data(data)

        self.target_estimator.fit(train.values,
                                  labels[self.target_column].values)
        self.constraints_estimator.fit(train.values,
                                       labels[self.constraint_columns].values)

        self.test_columns = train.columns

    def predict(self, data):
        predicted_constraints = self.constraints_estimator.predict(
            data[self.test_columns].values)

        predicted_data = {
            self.constraint_columns[i]: constraint
            for i, constraint in enumerate(predicted_constraints.T)
        }
        predicted_data[TARGET_COLUMN] = self.target_estimator.predict(
            data[self.test_columns].values)

        return pandas.DataFrame(predicted_data)
Пример #22
0
                               min_samples_split=25,
                               min_samples_leaf=35,
                               max_features=150)

# results = cross_val_score(et_model, train, y_train, cv=5, scoring='r2')
# print("ET score: %.4f (%.4f)" % (results.mean(), results.std()))

stack = StackingCVRegressor(  #meta_regressor=Ridge(alpha=10),
    meta_regressor=ElasticNet(l1_ratio=0.1, alpha=1.5),
    regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model))
#regressors=(svm_pipe, en_pipe, xgb_pipe, rf_model))

# cv_pred = cross_val_predict(stack, train, y_train, cv=5)
# print("R2 score: %.4f" % r2_score(y_train, cv_pred))
# exit()

## R2 score: 0.5600 (en_pipe, rf_model)
## R2 score: 0.5601 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model)
## R2 score: 0.5605 (svm_pipe, en_pipe, xgb_pipe, rf_model, et_model, lgbm_model)
## R2 score: 0.5618 (svm_pipe, en_pipe, xgb_pipe, rf_model, lgbm_model)

stack.fit(train, y_train)

y_test = stack.predict(test)

df_sub = pd.DataFrame({'ID': id_test, 'y': y_test})
df_sub.to_csv('mercedes_submissions/ensemble.csv', index=False)

##
## https://www.kaggle.com/eaturner/stacking-em-up/output
##
Пример #23
0
    RobustScaler(), ElasticNet(max_iter=1e7, alpha=0.0004, l1_ratio=0.9))
score = rmsle_cv(elasticnet)
print(f"ElasticNet score: {score.mean():.4f} ({score.std():.4f})")

e = elasticnet.fit(train, y_train)
predictions = elasticnet.predict(test)
EN = np.expm1(predictions)

stack_gen = StackingCVRegressor(regressors=(ridge, lasso, elasticnet, lgb, gbr,
                                            svr),
                                meta_regressor=lgb,
                                use_features_in_secondary=True)
score = rmsle_cv(stack_gen)
print(f"Stack score: {score.mean():.4f} ({score.std():.4f})")
sg = stack_gen.fit(train, y_train)
predictions = stack_gen.predict(test)
STACK = np.expm1(predictions)

for i in range(LASSO.size):
    if LASSO[i] < 55000 or LASSO[i] > 500000:
        LASSO[i] = XGB[i]
    else:
        LASSO[i] = .1 * XGB[i] + .05 * LGB[i] + .05 * LASSO[i] + \
                   .2 * SVR[i] + .05 * GBR[i] + .25 * RIDGE[i] + .05 * EN[i] + .25 * STACK[i]

submission = pd.DataFrame()
submission['Id'] = test_ID
submission['SalePrice'] = LASSO
submission.head()
submission.to_csv(r'../submissions/{}_{}.csv'.format(
    os.path.basename(sys.argv[0])[:-3],
Пример #24
0
model3 = make_regressor(clf='lgb')

lr = LinearRegression()

stack_one = StackingCVRegressor(regressors=[model1, model2, model3], 
                            meta_regressor=lr, 
                            cv=5,
                            verbose=0,
                            store_train_meta_features=True,
                            random_state=SEED)

stack_one.fit(X, y, groups=groups)

## Return ordinal predictions
pr1 = stack_one.predict(X)
optR = OptimizedRounder()
coefs_one = get_coef(stack_one, X, y)
#print(coefs_one)
#one_preds = optR.predict(pr1.reshape(-1, ), coefs_one)
#print(np.round(qwk(y, one_preds), 3))
# lr.fit(stack_one.train_meta_features_, y)
# p1 = lr.predict(stack_one.predict_meta_features(X))



# Define models for the second ensemble (using group k-fold)
model4 = make_regressor(clf='ngb')

model5 = make_regressor(clf='cat')
Пример #25
0
print(datetime.now(), 'GBMR: ',end="")
GBMR.fit(X_train, y)
print(rmsle(y, GBMR.predict(X_train)))

print(datetime.now(), 'LGBMR: ',end="")
LGBMR.fit(X_train, y)
print(rmsle(y, LGBMR.predict(X_train)))

print(datetime.now(), 'XGBR: ',end="")
XGBR.fit(X_train, y)
print(rmsle(y, XGBR.predict(X_train)))

print(datetime.now(), 'STACKING: ',end="")
STACKING.fit(np.array(X_train), np.array(y))
print(rmsle(y, STACKING.predict(np.array(X_train))))

#%%
print('''
################################################################################################
# FINISH
################################################################################################
''')

def Arithmetic_Blending(X):
    return (
            (0.1 * RIDGE_MODEL.predict(np.array(X))) + \
            (0.35 * LGBMR.predict(X)) + \
            (0.55 * STACKING.predict(np.array(X)))#+ \
        )   
print(datetime.now(),'RMSLE score on train data:')
rf = RandomForestRegressor(n_estimators=50,
                           max_depth=5,
                           random_state=2018,
                           n_jobs=8)
xgb = XGBRegressor(n_estimators=50,
                   learning_rate=0.75,
                   random_state=2018,
                   n_jobs=8)
lgb = LGBMRegressor(n_estimators=50,
                    learning_rate=0.75,
                    random_state=2018,
                    n_jobs=8)
svr = SVR(kernel='rbf', gamma='auto')
lr = LinearRegression(n_jobs=8)
models = [rf, xgb, lgb, svr]

y_pred_self = StackingModels(models=models,
                             meta_model=lr,
                             X_train=X_train,
                             X_test=X_test,
                             y_train=y_train,
                             use_probas=False,
                             task_mode='reg')
mse = mean_squared_error(y_test, y_pred_self)
print('MyModel:  MSE = {:.6f}'.format(mse))

stack_reg = StackingCVRegressor(regressors=models, meta_regressor=lr,
                                cv=5).fit(X_train, y_train)
y_pred_mxltend = stack_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred_mxltend)
print('Mlxtend:  MSE = {:.6f}'.format(mse))
Пример #27
0
                            random_state=666)

for clf, label in zip([lr, lasso, ridge, xgboost, catboost, gbm, stack], [
        'LR', 'Lasso', 'Ridge', 'XGBoost', 'CatBoost', 'LightGBM',
        'StackingCVRegressor'
]):
    scores = cross_val_score(clf,
                             X_train,
                             Y_train,
                             cv=10,
                             scoring='neg_root_mean_squared_error')
    print("Neg. RMSE Score: %0.3f (+/- %0.3f) [%s]" %
          (scores.mean(), scores.std(), label))

stack.fit(X_train, Y_train)
Y_pred = stack.predict(x_test)
print(
    'Accuracy Stacked Regressor (RMSLE):',
    np.sqrt(metrics.mean_squared_log_error(np.expm1(y_test),
                                           np.expm1(Y_pred))))

#####PREDICT TEST DATA AND EXPORT FOR UPLOAD
predict_export = pd.DataFrame()
predict_export['Id'] = test['Id']
prediction_catboost = cb_reg.predict(test.drop('Id', axis=1))
prediction_catboost = np.expm1(prediction_catboost)
predict_export['SalePrice'] = prediction_catboost
predict_export.to_csv('submission_cb.csv', index=False)

predict_export = pd.DataFrame()
predict_export['Id'] = test['Id']
Пример #28
0
    'gamma': 0,
    'reg_alpha': 0,
    'reg_lambda': 1
}

model = xgb.XGBRegressor(**other_params)
mgb = GridSearchCV(estimator=model,
                   param_grid=cv_params,
                   scoring='neg_mean_squared_error',
                   cv=5,
                   verbose=1)
mgb.fit(train_X, train_Y)
print('参数的最佳取值:{0}'.format(mgb.best_params_))
print('最佳模型得分:{0}'.format(-mgb.best_score_))
myxgb = mgb.best_estimator_

##############################--模型融合--######################################
stack = StackingCVRegressor(regressors=[myGBR, myxgb],
                            meta_regressor=LinearRegression(),
                            use_features_in_secondary=True,
                            cv=5)

stack.fit(train_X, train_Y)
pred_Y = stack.predict(test_X)
print(mean_squared_error(test_Y, pred_Y))

Y_pred = stack.predict(test)
results = pd.DataFrame(Y_pred, columns=['target'])
results.to_csv("results.txt", index=False, header=False)
print("over")
Пример #29
0
#mgb = GridSearchCV(estimator=model, param_grid=cv_params, scoring='neg_mean_squared_error', cv=5, verbose=1)
#mgb.fit(train_X, train_Y)
#print('参数的最佳取值:{0}'.format(mgb.best_params_))
#print('最佳模型得分:{0}'.format(-mgb.best_score_))
#myxgb = mgb.best_estimator_

myxgb = xgb.XGBRegressor(**other_params)

###############################--模型融合--######################################
stack = StackingCVRegressor(regressors=[myxgb, myRFR, mylgb],
                            meta_regressor=bayes,
                            use_features_in_secondary=True,
                            cv=8)

stack.fit(train_X, train_Y)
pred_Y = stack.predict(test_X)
mse = mean_squared_error(test_Y, pred_Y)
print('mse: %.10f' % mse)
folds = KFold(n_splits=7, shuffle=True, random_state=2019)

#mean = []
#for fold, (i, j) in enumerate(folds.split(train_X1, train_Y1)):
#    print("fold {}".format(fold+1))
#    trn_X, trn_Y = train_X1[i], train_Y1[i]
#    tsn_X, tsn_Y = train_X1[j], train_Y1[j]
#
#    stack = stack
#    stack.fit(trn_X, trn_Y)
#    pred_Y = stack.predict(tsn_X)
#    mse = mean_squared_error(tsn_Y, pred_Y)
#    print('mse: %.10f' % mse)
Пример #30
0
                           max_depth=50)

avg = StackingCVRegressor(regressors=(lightgbm, grb, svr, krr, rf),
                          meta_regressor=xgb,
                          use_features_in_secondary=True)


def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(y, y_pred))


x = np.array(X)
Y = np.array(y)

avg.fit(x, Y)
y_pred = avg.predict(x)

rmsle(y, y_pred)
Predict = avg.predict(np.array(test_df.drop('Price', axis=1)))

# Converting price back to original scale and making it integer
Predict = np.exp(Predict)
Predict = Predict.astype(int)

#########################################################################3#
import lightgbm as lgb

model_lgb = lgb.LGBMRegressor(objective='regression',
                              num_leaves=5,
                              learning_rate=0.05,
                              n_estimators=720,
Пример #31
0
    score = rmsle_cv(model)
    end = time.time()
    print("{}: {:.6f}, {:.4f} in {:.3f} s".format(name, score.mean(),
                                                  score.std(), end - start))
sel_models = [
    Lasso(alpha=0.0005, random_state=1, max_iter=10000),
    ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3),
    KernelRidge(alpha=0.9, kernel='polynomial', degree=3, coef0=2.5),
    GradientBoostingRegressor(n_estimators=3000,
                              learning_rate=0.05,
                              max_depth=4,
                              max_features='sqrt',
                              min_samples_leaf=15,
                              min_samples_split=10,
                              loss='huber',
                              random_state=5),
    SVR(gamma=0.0004, kernel='rbf', C=13, epsilon=0.009),
    BayesianRidge(),
]
np.random.seed(42)
stack = StackingCVRegressor(regressors=sel_models[:5],
                            meta_regressor=sel_models[5])
start = time.time()
score = rmsle_cv(stack)
end = time.time()
print("Stacked : {:.6f}, (+/-) {:.4f} in {:.3f} s".format(
    score.mean(), score.std(), end - start))
stack.fit(train, target.values)
pred = np.exp(stack.predict(test))
result = pd.DataFrame({'Id': test_ID, 'SalePrice': pred})
result.to_csv("submission.csv", index=False)
Y_pred_linear = sc.inverse_transform(lr.predict(X_test))
#Evaluating
print("\n\nLinear Regression SCORE : ", score(Y_pred_linear, actual_cost))

'''

#--------------------------------------------------------------------------------

#Stacking Ensemble Regression
###########################################################################
#Importing and Initializing the Regressor
from mlxtend.regressor import StackingCVRegressor

#Initializing Level One Regressorsxgbr = XGBRegressor()
#rf = RandomForestRegressor(n_estimators=100, random_state=1)
#lr = LinearRegression()

#Stacking the various regressors initialized before
stack = StackingCVRegressor(regressors=(xgbr, rf),
                            meta_regressor=xgbr,
                            use_features_in_secondary=True)

#Fitting the data
stack.fit(X_train, Y_train)

#Predicting the Test set results
y_pred_ense = sc.inverse_transform(stack.predict(X_test))

#Evaluating
print("\n\nStackingCVRegressor SCORE : ", score(y_pred_ense, actual_cost))
Пример #33
0
#meta regressor with the SalePrice being predictor values
stack_gen = StackingCVRegressor(regressors=(ridge, elasticnet, lasso, xgboost,
                                            lightgbm),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

stackX = np.array(X)
stacky = np.array(y)

#Fit the models
elasticnet.fit(X, y)
lasso.fit(X, y)
ridge.fit(X, y)
xgboost.fit(X, y)
lightgbm.fit(X, y)
stack_gen.fit(stackX, stacky)

#We take a weighted average of our predictions, what this does adds some variance at the expense of losing a little bias
stack_preds = ((0.2 * elasticnet.predict(test_data)) +
               (0.1 * lasso.predict(test_data)) +
               (0.1 * ridge.predict(test_data)) +
               (0.2 * xgboost.predict(test_data)) +
               (0.1 * lightgbm.predict(test_data)) +
               (0.3 * stack_gen.predict(test_data)))

#Create The Submission
sub = pd.DataFrame()
sub['Id'] = test_ids
sub['SalePrice'] = np.expm1(stack_preds)
sub.to_csv('submission.csv', index=False)
Пример #34
0
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

x, y = boston_housing_data()
x = x[:100]
y = y[:100]
# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
# 初始化基模型
lr = LinearRegression()
svr_lin = SVR(kernel='linear', gamma='auto')
ridge = Ridge(random_state=2019, )
lasso = Lasso()
models = [lr, svr_lin, ridge, lasso]

print("base model")
for model in models:
    score = cross_val_score(model, x_train, y_train, cv=5)
    print(score.mean(), "+/-", score.std())
sclf = StackingCVRegressor(regressors=models, meta_regressor=lasso)
# 训练回归器
print("stacking model")
score = cross_val_score(sclf, x_train, y_train, cv=5)
print(score.mean(), "+/-", score.std())

sclf.fit(x_train, y_train)
pred = sclf.predict(x_test)
print("loss is {}".format(mean_squared_error(y_test, pred)))
Пример #35
0
model = xgb.XGBRegressor(**other_params)
mgb = GridSearchCV(estimator=model,
                   param_grid=cv_params,
                   scoring='neg_mean_squared_error',
                   cv=5,
                   verbose=1)
mgb.fit(train_X, train_Y)
print('参数的最佳取值:{0}'.format(mgb.best_params_))
print('最佳模型得分:{0}'.format(-mgb.best_score_))
myxgb = mgb.best_estimator_

stack = myxgb
stack.fit(train_X, train_Y)
Y_pred = stack.predict(test_X)
print(mean_squared_error(test_Y, Y_pred))

################################--模型融合--######################################
stack = StackingCVRegressor(regressors=[myxgb, mylgb],
                            meta_regressor=LinearRegression(),
                            use_features_in_secondary=True,
                            cv=5)

stack.fit(train_X, train_Y)
pred_Y = stack.predict(test_X)
print(mean_squared_error(test_Y, pred_Y))

Y_pred = stack.predict(X1_pca)
results = pd.DataFrame(Y_pred, columns=['target'])
results.to_csv("results.txt", index=False, header=False)
print("over")