def test_predictions_from_sparse_matrix(): lr = LinearRegression() svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) # dense stregr.fit(X1, y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61 # sparse stregr.fit(sparse.csr_matrix(X1), y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61
def test_predictions_from_sparse_matrix(): lr = LinearRegression() svr_lin = SVR(kernel='linear', gamma='auto') ridge = Ridge(random_state=1) stregr = StackingRegressor(regressors=[svr_lin, lr], meta_regressor=ridge) # dense stregr.fit(X1, y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61 # sparse stregr.fit(sparse.csr_matrix(X1), y) print(stregr.score(X1, y)) assert round(stregr.score(X1, y), 2) == 0.61
svr_lin = SVR(kernel='linear') ridge = Ridge(random_state=1) svr_rbf = SVR(kernel='rbf') stregr = StackingRegressor(regressors=[svr_lin, lr, ridge], meta_regressor=svr_rbf) # Training the stacking classifier stregr.fit(X, y) stregr.predict(X) # Evaluate and visualize the fit print("Mean Squared Error: %.4f" % np.mean((stregr.predict(X) - y)**2)) print('Variance Score: %.4f' % stregr.score(X, y)) with plt.style.context(('seaborn-whitegrid')): plt.scatter(X, y, c='lightgray') plt.plot(X, stregr.predict(X), c='darkgreen', lw=2) plt.show() # Example 2 - Stacked Regression and GridSearch from sklearn.model_selection import GridSearchCV from sklearn.linear_model import Lasso # Initializing models lr = LinearRegression()
gbm_penetration_rate = lgb.LGBMRegressor( n_estimators=200, subsample_freq=1, subsample=0.8, colsample_bytree=0.8, learning_rate=0.05, max_depth=8, num_leaves=256, objective='xentropy', device='gpu', ) xgb_penetration_rate = xgb.XGBRegressor(n_estimators=200, subsample_freq=1, subsample=0.7, colsample_bytree=0.7, learning_rate=0.1, max_depth=8, num_leaves=256, objective='reg:logistic', n_jobs=-1) meta_reg = Ridge() stregr = StackingRegressor( regressors=[gbm_penetration_rate, xgb_penetration_rate], meta_regressor=meta_reg) stregr.fit(X_train, y_train[:, 0]) print(1 - stregr.score(X_val, y_val[:, 0]))
K.set_session(sess) np.random.seed(7) rn.seed(7) from mlxtend.regressor import StackingRegressor rf = RandomForestRegressor(n_estimators=54, max_depth=None, random_state=8) ext = ExtraTreesRegressor(n_estimators=584, min_samples_split=2, random_state=8) def create_model(): model = Sequential() model.add(Dense(540, input_dim=8, activation='relu')) model.add(BatchNormalization()) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam', metrics=['mse', 'mae']) return model nn = KerasRegressor(build_fn=create_model, epochs=32, batch_size=32, verbose=0) clf = StackingRegressor(regressors=[nn, ext], meta_regressor=rf) scores = [] for train, test in kfold.split(X, y): clf.fit(X[train], y[train]) score = clf.score(X[test], y[test]) print(score) scores.append(score) print("%.3f%% (+/- %.3f)" % (np.mean(scores), np.std(scores)))
XGBoost = grid_search(train_X, train_Y, xgb.XGBRegressor()) light_GBM = grid_search(train_X, train_Y, lgb.LGBMRegressor()) stacked_regression = StackingRegressor(regressors=[ elastic_net, lasso, ridge, support_vector_regressor, XGBoost, light_GBM ], meta_regressor=support_vector_regressor) stacked_regression.fit(train_X, train_Y) stacked = stacked_regression.predict(test_X) ensembled = np.expm1((0.1 * elastic_net.predict(test_X)) + (0.2 * lasso.predict(test_X)) + (0.1 * ridge.predict(test_X)) + (0.1 * support_vector_regressor.predict(test_X)) + (0.2 * XGBoost.predict(test_X)) + (0.1 * light_GBM.predict(test_X)) + (0.2 * stacked)) print(stacked_regression.score(train_X, train_Y)) """ Export submission data """ submission = pd.DataFrame({ 'Id': test_X.index + (len(train_X_bf) - len(train_X) + 1), 'SalePrice': ensembled }) submission.to_csv('data/submission.csv', index=False)
gbm_estimator = ensemble.GradientBoostingRegressor(random_state=100, n_estimators=100, max_features=3, max_depth=5, learning_rate=0.05) stage1_models = [dt_estimator, rf_estimator, gbm_estimator] stage2_model = tree.DecisionTreeRegressor(random_state=100) stacked_model = StackingRegressor(regressors=stage1_models, meta_regressor=stage2_model) stacked_model.fit(X_train1, y_train) print(stacked_model.grid_scores_) print(stacked_model.best_params_) print(stacked_model.best_score_) print(stacked_model.score(X_train1, y_train)) stacked_model.fit(X_train1, y_train) stacked_model.predict(X_train1) X_test = total_data1[train_data.shape[0]:] X_test.shape X1 = X_test.select_dtypes(include=['number']).columns X_test1 = X_test[X1] X_test1.shape X_test1.info() X_test1 = X_test1.drop(['bought', 'sold', 'libor_rate', 'return'], axis=1, inplace=False) X_test1.shape
reg_dt = DecisionTreeRegressor(min_samples_leaf = 11 , min_samples_split = 33, random_state=500) reg_lr = LinearRegression(normalize=True) reg_ridge = Ridge(random_state=500) # Instantiate the 2nd-layer regressor reg_meta = LinearRegression() # Build the Stacking regressor reg_stack = StackingRegressor( regressors=[reg_dt, reg_lr, reg_ridge], meta_regressor=reg_meta) reg_stack.fit(X_train, y_train) # Evaluate the performance on the test set using the MAE metric pred = reg_stack.predict(X_test) stacks = reg_stack.score(X_test, y_test) rmseS = np.sqrt(mean_squared_error(y_test, pred)) print('MAE: {:.3f}'.format(mean_absolute_error(y_test, pred))) print('RMSE (Stacking): {:.3f}, Accuracy Score: {:.2f}'.format(rmseS, stacks), **sp) with mgt.changepath(path): npzfile = np.load('mydata.npz') X_train, X_test = npzfile['X_train'], npzfile['X_test'] y_train, y_test =npzfile['y_train'], npzfile['y_test'] # Create the first-layer models
X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], early_stopping_rounds= 50, #stop if 50 consequent rounds without decrease of error verbose=False) # Change verbose to True if you want to see it train predict = reg.predict(X_test) print("R-squared value: ", reg.score(X_test, y_test)) # Best R-squared value I have so far is: 0.9866523109744227 xgb.plot_importance(reg) plt.show() lr = LinearRegression() stregr = StackingRegressor(regressors=[lr, reg], meta_regressor=reg) stregr = stregr.fit(X_train, y_train) print('Variance Score: %.4f' % stregr.score(X_train, y_train)) dump(stregr, 'classical_model.joblib') # RFE Classical Model estimator = reg selector = RFE(estimator, 3, step=1) selector = selector.fit(X, y) print("Feature Ranking: ", selector.ranking_) # TODO: Serialize classical model so that doesn't retrain, # then load up and try with a date from the training data to see if it works.
#######support vector######## from sklearn.svm import SVR regressor_s = SVR(kernel = 'linear') regressor_s.fit(X_train,y_train) #####stacking from mlxtend.regressor import StackingRegressor from mlxtend.data import boston_housing_data stregr = StackingRegressor(regressors=[regressor,GBoost ,regressor_r], meta_regressor=xgb) stregr.fit(X_train,y_train) y_pred = stregr.predict(X_test) print('RMSE for Stacked Regression is {:.4f}'.format(sqrt(mean_squared_error(y_test, y_pred)))) stregr.score(X_test,y_test) model = Sequential() model.add(Dense(200, input_dim=220, kernel_initializer='normal', activation='relu')) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adadelta()) # Mean squared error model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=66, batch_size=32, verbose=2) y_pred = model.predict(X_test) print('RMSE for Neural Network is {:.4f}'.format(sqrt(mean_squared_error(y_test, y_pred)))) model.score(X_test,y_test) plt.style.use('ggplot') plt.plot(y_pred, y_test, 'ro') plt.xlabel('Predictions', fontsize = 15) plt.ylabel('Reality', fontsize = 15) plt.title('Predictions x Reality on dataset Test', fontsize = 15)