def stacking(X, y, k_cv): res = [] estimators = [('krr', KernelRidge(kernel="cosine", alpha=0.001)), ('svr', SVR(C=2000, gamma=0.001)), ("enet", ElasticNet(alpha=0.00001, l1_ratio=0.0005, max_iter=10000))] reg = StackingRegressor(estimators=estimators, n_jobs=15, final_estimator=LinearRegression()) kfold = KFold(n_splits=k_cv, shuffle=True, random_state=0) vaild_split = kfold.split(y) for i in range(k_cv): split_index = vaild_split.__next__() test_index = split_index[1] y_test = y[test_index] trainval_index = split_index[0] X_trainval = X[trainval_index, :] X_test = X[test_index, :] y_trainval = y[trainval_index] reg.fit(X_trainval, y_trainval) print((reg.score(X_trainval, y_trainval))**0.5) test_pre = reg.predict(X_test) print("accuracy: ", (r_2(y_test, test_pre))**0.5) res.append(r_2(y_test, test_pre)**0.5) print("mean acacuracy: ", np.array(res).mean()) print("mean acacuracy: ", np.array(res).mean())
def stacking_qtlmas(X_trainval, y_trainval, X_test, y_test): res = [] estimators = [('krr', KernelRidge(kernel="cosine", alpha=0.005)), ('svr', SVR(C=2500, gamma=0.001)), ("enet", ElasticNet(alpha=0.00001, l1_ratio=0.0005, max_iter=10000))] reg = StackingRegressor(estimators=estimators, n_jobs=15, final_estimator=LinearRegression()) reg.fit(X_trainval, y_trainval) print((reg.score(X_trainval, y_trainval))**0.5) test_pre = reg.predict(X_test) return test_pre
stack_test_mse = mean_squared_error(ytest, stack_test_pred) print("RMSE using StackRegressor:\t{}\t{}\t{}\n".format( np.sqrt(stack_train_mse), np.sqrt(stack_val_mse), np.sqrt(stack_test_mse))) df_rf = pd.DataFrame({'Actual': ytest, 'Predicted': stack_test_pred}) fig1 = pp.figure(figsize=(8, 6)) df_rf.head(n=300).plot() pp.legend() pp.title("StackRegressor Actual v/s Predicted Annual Rainfall") pp.xticks(()) pp.yticks(()) pp.show() print(rfreg.score(Xtest, ytest), elastic.score(Xtest, ytest), stack.score(Xtest, ytest)) # CLASSIFICATION & CLUSTERING METHODS # Data splitting X = np.array(raindf[['JAN-FEB', 'MAR-MAY', 'JUN-SEP', 'OCT-DEC']]) y = np.array(raindf['REGION']) le = preprocessing.LabelEncoder() le.fit(y) y = le.fit_transform(y) Xreduced = PCA(n_components=2).fit_transform(X) results = pd.DataFrame(Xreduced, columns=['pca1', 'pca2']) Xtrain, Xtest, ytrain, ytest = model_selection.train_test_split(Xreduced,
#Step 1:Loading data X, y = load_boston(return_X_y=True) #Step 2:Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40) #step3:Training regression = StackingRegressor(estimators=[ ('knn', KNeighborsRegressor(n_neighbors=4, weights='distance', leaf_size=1, metric='manhattan')), ('dt', GradientBoostingRegressor(max_depth=3, n_estimators=220)) ], final_estimator=Ridge(random_state=40), cv=5, n_jobs=-1) regression.fit(X_train, y_train) score_train = regression.score(X_train, y_train) score_test = regression.score(X_test, y_test) pred_train = regression.predict(X_train) pred_test = regression.predict(X_test) rmse_train = np.sqrt(metrics.mean_squared_error(pred_train, y_train)) rmse_test = np.sqrt(metrics.mean_squared_error(pred_test, y_test)) print('RMSE:{:.2f}/{:.2f}'.format(rmse_train, rmse_test)) print('R2Score:{:.2f}/{:.2f}'.format(score_train, score_test))
from sklearn.datasets import load_boston X, y = load_boston(return_X_y=True) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) reg.fit(X_train, y_train) y_pred = reg.predict(X_test) plt.figure() plt.plot(y_test[:30], 'gd', label='Original') plt.plot(y_pred[:30], 'b^', label='Stacking Regressor') plt.show() from sklearn.metrics import r2_score print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred))) #For multiple stacking layres final_layer = StackingRegressor(estimators=[ ('rf', RandomForestRegressor(random_state=42)), ('gbrt', GradientBoostingRegressor(random_state=42)) ], final_estimator=RidgeCV()) multi_layer_regressor = StackingRegressor(estimators=[ ('ridge', RidgeCV()), ('lasso', LassoCV(random_state=42)), ('svr', SVR(C=1, gamma=1e-6, kernel='rbf')) ], final_estimator=final_layer) multi_layer_regressor.fit(X_train, y_train) print('R2 score: {:.2f}'.format(multi_layer_regressor.score(X_test, y_test)))
from sklearn.experimental import enable_hist_gradient_boosting from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor, StackingRegressor from sklearn.model_selection import GridSearchCV from sklearn.linear_model import RidgeCV dataset = pd.read_pickle("data/final_dataset.pkl") dataset = dataset[dataset.score < 5] X = dataset.drop(["score", "author_id", "tweet_id"], axis=1, errors="ignore").to_numpy() y = dataset[["score"]].values.ravel() # Train a Stacking regressor print("#################### STACKING ####################") estimators = [("Random Forest", RandomForestRegressor(random_state=14, n_jobs=-1, max_depth=13)), ("Gradient Boosting", HistGradientBoostingRegressor(random_state=14, l2_regularization=1))] model = StackingRegressor(estimators=estimators, final_estimator=RidgeCV(), n_jobs=-1) model.fit(X, y) # Print the evaluation results print("MSError", model.score(X, y)) # 0.6033506428465925 print(model) print() pickle.dump(model, open("data/final_best_model_stacking.pkl", 'wb'))