def bo_tpe_knn(X, y): starttime = datetime.datetime.now() def objective(params): params = {'n_neighbors': abs(int(params['n_neighbors']))} clf = KNeighborsRegressor(**params) score = -np.mean( cross_val_score( clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} space = { 'n_neighbors': hp.quniform('n_neighbors', 1, 20, 1), } trials_knn = Trials() best_knn = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials_knn) print("KNN MSE score:%.4f" % min(trials_knn.losses())) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", best_knn) save_model_object(best_knn, 'BO-TPE', 'KNN', 'KNN') return min(trials_knn.losses()), process_time_knn, best_knn
def optuna_xgb(X, y): # 参考 https://xgboost.readthedocs.io/en/latest/tutorials/rf.html # https://data-analysis-stats.jp/%e6%a9%9f%e6%a2%b0%e5%ad%a6%e7%bf%92/python%e3%81%a7xgboost/ def objective(trial): params = { 'learning_rate': trial.suggest_float("learning_rate", 1e-4, 1, log=True), 'max_depth': trial.suggest_int("max_depth", 1, 46, step=5), 'n_estimators': trial.suggest_int("n_neighbors", 100, 220, step=30), 'objective': 'reg:squarederror', } clf = xgb.XGBRegressor(**params) score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return score study_xgb = optuna.create_study(direction="minimize") study_xgb.optimize(objective, n_trials=10) optuna_xgb_mse_score = study_xgb.best_value optuna_xgb_time = (study_xgb.best_trial.datetime_complete - study_xgb.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_xgb_time, 60) h, m = divmod(m, 60) optuna_xgb_time = "%d:%02d:%09f" % (h, m, s) print("XGBoost MSE score:%.4f" % optuna_xgb_mse_score) print("程序执行时间(秒):{}".format(optuna_xgb_time)) print("最佳超参数值集合:", study_xgb.best_params) save_model_object(study_xgb, 'BO-TPE', 'NGBoost', 'NGBoost') return optuna_xgb_mse_score, optuna_xgb_time, study_xgb.best_params
def optuna_svr(X, y): def objective(trial): params = { "kernel": trial.suggest_categorical("kernel", ["linear", "poly", "rbf"]), "C": trial.suggest_loguniform("C", 1e-5, 1e2), # 'degree': trial.suggest_int("degree", 1,10,step=1), # 'epsilon': trial.suggest_float("epsilon", 0.1,0.5,step=0.1), } clf = SVR(**params, gamma="scale") score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return score study_svr = optuna.create_study(direction="minimize") study_svr.optimize(objective, n_trials=5) optuna_svr_mse_score = study_svr.best_value optuna_svr_time = (study_svr.best_trial.datetime_complete - study_svr.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_svr_time, 60) h, m = divmod(m, 60) optuna_svr_time = "%d:%02d:%09f" % (h, m, s) print("SVR MSE score:%.4f" % optuna_svr_mse_score) print("程序执行时间(秒):{}".format(optuna_svr_time)) print("最佳超参数值集合:", study_svr.best_params) save_model_object(study_svr, 'Optuna', 'SVR', 'SVR') return optuna_svr_mse_score, optuna_svr_time, study_svr.best_params
def bo_tpe_svr(X, y): starttime = datetime.datetime.now() def objective(params): params = { 'C': abs(float(params['C'])), "kernel": str(params['kernel']), 'epsilon': abs(float(params['epsilon'])), } clf = SVR(gamma='scale', **params) score = -np.mean( cross_val_score( clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} space = { 'C': hp.normal('C', 0, 50), "kernel": hp.choice('kernel', ['poly', 'rbf', 'sigmoid']), 'epsilon': hp.normal('epsilon', 0, 1), } trials_svr = Trials() best_svr = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20, trials=trials_svr) print("SVM MSE score:%.4f" % min(trials_svr.losses())) endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("程序执行时间(秒):{}".format(process_time_svr)) print("最佳超参数值集合:", best_svr) save_model_object(best_svr, 'BO-TPE', 'SVR', 'SVR') return min(trials_svr.losses()), process_time_svr, best_svr
def bo_ANN(X, y): rf_params = { 'activation': ['relu', 'tanh'], 'loss': ['mse'], 'batch_size': [32, 64, 128], 'neurons': Integer(256, 1024), 'epochs': [20, 30, 50, 60] # 'patience': Integer(3, 20) } starttime = datetime.datetime.now() clf = KerasRegressor(build_fn=ANN, verbose=verbose) Bayes_ann = BayesSearchCV(clf, rf_params, cv=3, n_iter=10, scoring='neg_mean_squared_error') Bayes_ann.fit(X, y) print("ANN MSE score:" + str(-Bayes_ann.best_score_)) endtime = datetime.datetime.now() process_time_ann = endtime - starttime print("程序执行时间(秒):{}".format(process_time_ann)) print("最佳超参数值集合:", Bayes_ann.best_params_) model_bo_ann = ANN(**Bayes_ann.best_params_) save_model_object(model_bo_ann, 'BO-GP', 'ANN', 'ANN') return str( -Bayes_ann.best_score_), process_time_ann, Bayes_ann.best_params_
def bo_RandomForestRegressor(X, y): # Define the hyperparameter configuration space rf_params = { 'n_estimators': Integer(10, 100), "max_features": Integer(1, 13), 'max_depth': Integer(5, 50), "min_samples_split": Integer(2, 11), "min_samples_leaf": Integer(1, 11), "criterion": ['mse', 'mae'] } starttime = datetime.datetime.now() clf = RandomForestRegressor(random_state=0) Bayes_rf = BayesSearchCV(clf, rf_params, cv=3, n_iter=20, scoring='neg_mean_squared_error') # number of iterations is set to 20, you can increase this number if time permits Bayes_rf.fit(X, y) # bclf = Bayes_rf.best_estimator_ print("RandomForestRegressor MSE score:" + str(-Bayes_rf.best_score_)) endtime = datetime.datetime.now() process_time_rf = endtime - starttime print("程序执行时间(秒):{}".format(process_time_rf)) print("最佳超参数值集合:", Bayes_rf.best_params_) save_model_object(Bayes_rf, 'BO-GP', 'RandomForestRegressor', 'RandomForestRegressor') return str(-Bayes_rf.best_score_), process_time_rf, Bayes_rf.best_params_
def gpminimize_RandomForestRegressor(X, y): starttime = datetime.datetime.now() reg = RandomForestRegressor() # Define the hyperparameter configuration space space = [ Integer(10, 100, name='n_estimators'), Integer(5, 50, name='max_depth'), Integer(1, 13, name='max_features'), Integer(2, 11, name='min_samples_split'), Integer(1, 11, name='min_samples_leaf'), Categorical(['mse', 'mae'], name='criterion') ] # Define the objective function @use_named_args(space) def objective(**params): reg.set_params(**params) return -np.mean( cross_val_score( reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) res_gp_rf = gp_minimize(objective, space, n_calls=20, random_state=0) # number of iterations is set to 20, you can increase this number if time permits print("RandomForestRegressor MSE score:%.4f" % res_gp_rf.fun) endtime = datetime.datetime.now() process_time_rf = endtime - starttime print("程序执行时间(秒):{}".format(process_time_rf)) print("最佳超参数值集合:", res_gp_rf.x) save_model_object(res_gp_rf.models, 'gp_minimize', 'RandomForestRegressor', 'RandomForestRegressor') return res_gp_rf.fun, process_time_rf, res_gp_rf.x
def rs_ANN(X, y): rf_params = { 'activation': ['relu', 'tanh'], 'loss': ['mse'], 'batch_size': [32, 64, 128], 'neurons': sp_randint(256, 1024), 'epochs': [30, 50, 80] # 'patience': sp_randint(3, 20) } n_iter_search = 10 starttime = datetime.datetime.now() clf = KerasRegressor(build_fn=ANN, verbose=verbose) Random_ann = RandomizedSearchCV(clf, param_distributions=rf_params, n_iter=n_iter_search, cv=3, scoring='neg_mean_squared_error') Random_ann.fit(X, y) print("ANN MSE score:" + str(-Random_ann.best_score_)) endtime = datetime.datetime.now() process_time_ann = endtime - starttime print("程序执行时间(秒):{}".format(process_time_ann)) print("最佳超参数值集合:", Random_ann.best_params_) model_random_ann = ANN(**Random_ann.best_params_) save_model_object(model_random_ann, 'random_search', 'ANN', 'ANN') return str( -Random_ann.best_score_), process_time_ann, Random_ann.best_params_
def grid_RandomForestRegressor(X, y): # Define the hyperparameter configuration space rf_params = { 'n_estimators': [10, 20, 30], 'max_features': ['sqrt', 0.5], 'max_depth': [15, 20, 30, 50], 'min_samples_leaf': [1, 2, 4, 8], "bootstrap": [True, False], "criterion": ['mse', 'mae'] } starttime = datetime.datetime.now() clf = RandomForestRegressor(random_state=0) grid_rf = GridSearchCV(clf, rf_params, cv=3, scoring='neg_mean_squared_error') grid_rf.fit(X, y) print(grid_rf.best_params_) print("RandomForestRegressor MSE score:" + str(-grid_rf.best_score_)) endtime = datetime.datetime.now() process_time_rf = endtime - starttime print("程序执行时间(秒):{}".format(process_time_rf)) print("最佳超参数值集合:", grid_rf.best_params_) save_model_object(grid_rf, 'grid_search', 'RandomForestRegressor', 'RandomForestRegressor') return str(-grid_rf.best_score_), process_time_rf, grid_rf.best_params_
def optuna_RandomForestRegressor(X, y): # Define the objective function def objective(trial): n_estimators = trial.suggest_int("n_estimators", 1, 100) max_depth = trial.suggest_int("max_depth", 1, 20) clf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=0) score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return score # 因为我们要获得最好的MSE,所以方向是min。direction="minimize" study_rf = optuna.create_study(direction="minimize") study_rf.optimize(objective, n_trials=5) optuna_rf_mse_score = study_rf.best_value optuna_rf_time = (study_rf.best_trial.datetime_complete - study_rf.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_rf_time, 60) h, m = divmod(m, 60) optuna_rf_time = "%d:%02d:%09f" % (h, m, s) print("RandomForestRegressor MSE score:%.4f" % optuna_rf_mse_score) print("程序执行时间(秒):{}".format(optuna_rf_time)) print("最佳超参数值集合:", study_rf.best_params) save_model_object(study_rf, 'Optuna', 'RandomForestRegressor', 'RandomForestRegressor') return optuna_rf_mse_score, optuna_rf_time, study_rf.best_params
def rs_RandomForestRegressor(X, y): # Define the hyperparameter configuration space rf_params = { 'n_estimators': sp_randint(10, 100), "max_features": sp_randint(1, 13), 'max_depth': sp_randint(5, 50), "min_samples_split": sp_randint(2, 11), "min_samples_leaf": sp_randint(1, 11), "criterion": ['mse', 'mae'] } # number of iterations is set to 20, you can increase this number if time permits n_iter_search = 20 starttime = datetime.datetime.now() clf = RandomForestRegressor(random_state=0) Random_rf = RandomizedSearchCV(clf, param_distributions=rf_params, n_iter=n_iter_search, cv=3, scoring='neg_mean_squared_error') Random_rf.fit(X, y) print("RandomForestRegressor MSE score:" + str(-Random_rf.best_score_)) endtime = datetime.datetime.now() process_time_rf = endtime - starttime print("程序执行时间(秒):{}".format(process_time_rf)) print("最佳超参数值集合:", Random_rf.best_params_) save_model_object(Random_rf, 'random_search', 'RandomForestRegressor', 'RandomForestRegressor') return str(-Random_rf.best_score_), process_time_rf, Random_rf.best_params_
def bo_tpe_ANN(X, y): starttime = datetime.datetime.now() def objective(params): params = { "activation": str(params['activation']), "loss": str(params['loss']), 'batch_size': abs(int(params['batch_size'])), 'neurons': abs(int(params['neurons'])), 'epochs': abs(int(params['epochs'])), 'learning_rate': abs(float(params['learning_rate'])) } clf = KerasRegressor(build_fn=ANN, **params, verbose=verbose) score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} space_activation = ['relu', 'tanh'] space_loss = ['mse', 'mae'] space = { "activation": hp.choice('activation', space_activation), "loss": hp.choice('loss', space_loss), 'batch_size': hp.quniform('batch_size', 32, 128, 32), 'neurons': hp.quniform('neurons', 256, 1024, 256), 'epochs': hp.quniform('epochs', 30, 60, 10), 'learning_rate': hp.uniform('learning_rate', 1e-5, 1e-2) } trials_ann = Trials() best_ann = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials_ann) print("ANN MSE score:%.4f" % min(trials_ann.losses())) endtime = datetime.datetime.now() process_time_ann = endtime - starttime print("程序执行时间(秒):{}".format(process_time_ann)) print("最佳超参数值集合:", best_ann) best_params_ann = { 'activation': space_activation[best_ann['activation']], 'loss': space_loss[best_ann['loss']], 'batch_size': int(best_ann['batch_size']), 'neurons': int(best_ann['neurons']), 'epochs': int(best_ann['epochs']), 'learning_rate': float(best_ann['learning_rate']) } model_bo_tpe_ann = ANN(**best_params_ann) save_model_object(model_bo_tpe_ann, 'BO-TPE', 'ANN', 'ANN') return min(trials_ann.losses()), process_time_ann, best_ann
def bs_ANN(X, y): starttime = datetime.datetime.now() base_ann = KerasRegressor(build_fn=ANN, verbose=0) score = cross_val_score(base_ann, X, y, cv=3, scoring='neg_mean_squared_error') base_ann_score = -score.mean() endtime = datetime.datetime.now() process_time_ann = endtime - starttime print("ANN MSE score {}".format(str(-score.mean()))) print("程序执行时间(秒):{}".format(process_time_ann)) save_model_object(base_ann, 'baseline', 'ann', 'ann') return base_ann_score, process_time_ann
def bs_svr(X, y): starttime = datetime.datetime.now() base_svr = SVR() score = cross_val_score(base_svr, X, y, cv=3, scoring='neg_mean_squared_error') base_svr_score = -score.mean() endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("SVR MSE score {}".format(-score.mean())) print("程序执行时间(秒):{}".format(process_time_svr)) save_model_object(base_svr, 'baseline', 'svr', 'svr') return base_svr_score, process_time_svr
def bs_KNN(X, y): starttime = datetime.datetime.now() base_knn = KNeighborsRegressor() score = cross_val_score(base_knn, X, y, cv=3, scoring='neg_mean_squared_error') base_knn_score = -score.mean() endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("KNN MSE score {}".format(-score.mean())) print("程序执行时间(秒):{}".format(process_time_knn)) save_model_object(base_knn, 'baseline', 'knn', 'knn') return base_knn_score, process_time_knn
def bs_random_forest_regressor(X, y): starttime = datetime.datetime.now() base_rf = RandomForestRegressor() score = cross_val_score(base_rf, X, y, cv=3, scoring='neg_mean_squared_error') base_rf_score = -score.mean() endtime = datetime.datetime.now() process_time_rf = endtime - starttime print(" RandomForestRegressor MSE score {}".format(-score.mean())) print("程序执行时间(秒):{}".format(process_time_rf)) save_model_object(base_rf, 'baseline', 'randomforest', 'randomforest') return base_rf_score, process_time_rf
def grid_knn(X, y): knn_params = {'n_neighbors': [2, 3, 5, 7, 10]} starttime = datetime.datetime.now() clf = KNeighborsRegressor() grid_knn = GridSearchCV(clf, knn_params, cv=3, scoring='neg_mean_squared_error') grid_knn.fit(X, y) print("KNN MSE score:" + str(-grid_knn.best_score_)) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", grid_knn.best_params_) save_model_object(grid_knn, 'grid_search', 'KNN', 'KNN') return str(-grid_knn.best_score_), process_time_knn, grid_knn.best_params_
def bo_tpe_RandomForestRegressor(X, y): starttime = datetime.datetime.now() # Define the objective function def objective(params): params = { 'n_estimators': int(params['n_estimators']), 'max_depth': int(params['max_depth']), 'max_features': int(params['max_features']), "min_samples_split": int(params['min_samples_split']), "min_samples_leaf": int(params['min_samples_leaf']), "criterion": str(params['criterion']) } clf = RandomForestRegressor(**params) score = -np.mean( cross_val_score( clf, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} # Define the hyperparameter configuration space space = { 'n_estimators': hp.quniform('n_estimators', 10, 150, 1), 'max_depth': hp.quniform('max_depth', 5, 50, 1), "max_features": hp.quniform('max_features', 1, 13, 1), "min_samples_split": hp.quniform('min_samples_split', 2, 11, 1), "min_samples_leaf": hp.quniform('min_samples_leaf', 1, 11, 1), "criterion": hp.choice('criterion', ['mse', 'mae']) } trials_rf = Trials() best_rf = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20, trials=trials_rf) print("Random Forest MSE score:%.4f" % min(trials_rf.losses())) endtime = datetime.datetime.now() process_time_rf = endtime - starttime print("程序执行时间(秒):{}".format(process_time_rf)) print("最佳超参数值集合:", best_rf) save_model_object(best_rf, 'BO-TPE', 'RandomForestRegressor', 'RandomForestRegressor') return min(trials_rf.losses()), process_time_rf, best_rf
def optuna_ANN(X, y): # 官网optuna都是使用sklearn里面定义好的模型,自定义模型要想使用optuna比较复杂。 # 一些参数使用默认就可以,不需要调整,默认值基本都是mes分数最低的 def objective(trial): params = { "learning_rate": trial.suggest_loguniform('learning_rate', 1e-5, 1e-2), "batch_size": trial.suggest_categorical("batch_size", [128, 256]), "activation": trial.suggest_categorical("activation", ['relu', 'tanh']), 'neurons': trial.suggest_int("neurons", 512, 2048, step=128), 'epochs': trial.suggest_int("epochs", 40, 100, step=10), } clf = KerasRegressor(build_fn=ANN, **params, verbose=verbose) score = cross_val_score(clf, X, y, cv=3, scoring='neg_mean_squared_error') obtuna_ann_score = -score.mean() # 官网optuna都是使用sklearn里面定义好的模型,自定义模型要想使用optuna比较复杂。 return obtuna_ann_score study_name_ann = 'optuna-ann' # Unique identifier of the study. study_ann = optuna.create_study(direction="minimize", study_name=study_name_ann) study_ann.optimize(objective, n_trials=10) optuna_ann_mse_score = study_ann.best_value optuna_ann_time = (study_ann.best_trial.datetime_complete - study_ann.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_ann_time, 60) h, m = divmod(m, 60) optuna_ann_time = "%d:%02d:%09f" % (h, m, s) print("ANN MSE score:%.4f" % optuna_ann_mse_score) print("程序执行时间(秒):{}".format(optuna_ann_time)) print("最佳超参数值集合:", study_ann.best_params) model_optuna_ann = ANN(**study_ann.best_params) save_model_object(model_optuna_ann, 'Optuna', 'ANN', 'ANN') return optuna_ann_mse_score, optuna_ann_time, study_ann.best_params
def bo_knn(X, y): rf_params = { 'n_neighbors': Integer(1, 20), } starttime = datetime.datetime.now() clf = KNeighborsRegressor() Bayes_knn = BayesSearchCV(clf, rf_params, cv=3, n_iter=10, scoring='neg_mean_squared_error') Bayes_knn.fit(X, y) print("KNN MSE score:" + str(-Bayes_knn.best_score_)) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", Bayes_knn.best_params_) save_model_object(Bayes_knn, 'BO-GP', 'KNN', 'KNN') return str( -Bayes_knn.best_score_), process_time_knn, Bayes_knn.best_params_
def gpminimize_knn(X, y): starttime = datetime.datetime.now() reg = KNeighborsRegressor() space = [Integer(1, 20, name='n_neighbors')] @use_named_args(space) def objective(**params): reg.set_params(**params) return -np.mean( cross_val_score( reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) res_gp_knn = gp_minimize(objective, space, n_calls=10, random_state=0) print("KNN MSE score:%.4f" % res_gp_knn.fun) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", res_gp_knn.x) save_model_object(res_gp_knn.models, 'gp_minimize', 'KNN', 'KNN') return res_gp_knn.fun, process_time_knn, res_gp_knn.x
def rs_knn(X, y): rf_params = { 'n_neighbors': sp_randint(1, 20), } n_iter_search = 10 starttime = datetime.datetime.now() clf = KNeighborsRegressor() Random_knn = RandomizedSearchCV(clf, param_distributions=rf_params, n_iter=n_iter_search, cv=3, scoring='neg_mean_squared_error') Random_knn.fit(X, y) print("KNN MSE score:" + str(-Random_knn.best_score_)) endtime = datetime.datetime.now() process_time_knn = endtime - starttime print("程序执行时间(秒):{}".format(process_time_knn)) print("最佳超参数值集合:", Random_knn.best_params_) save_model_object(Random_knn, 'random_search', 'KNN', 'KNN') return str( -Random_knn.best_score_), process_time_knn, Random_knn.best_params_
def grid_svr(X, y): # Define the hyperparameter configuration space svr_params = { 'C': [1, 10, 100], "kernel": ['poly', 'rbf', 'sigmoid'], "degree": np.arange(1, 10, 1), "epsilon": [0.01, 0.1, 1] } starttime = datetime.datetime.now() clf = SVR() grid_svr = GridSearchCV(clf, svr_params, cv=3, scoring='neg_mean_squared_error') grid_svr.fit(X, y) print("SVR MSE score:" + str(-grid_svr.best_score_)) endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("程序执行时间(秒):{}".format(process_time_svr)) print("最佳超参数值集合:", grid_svr.best_params_) save_model_object(grid_svr, 'grid_search', 'SVR', 'SVR') return str(-grid_svr.best_score_), process_time_svr, grid_svr.best_params_
def bo_svr(X, y): rf_params = { 'C': Real(1, 50), "kernel": ['poly', 'rbf', 'sigmoid'], 'epsilon': Real(0, 1) } starttime = datetime.datetime.now() clf = SVR(gamma='scale') Bayes_svr = BayesSearchCV(clf, rf_params, cv=3, n_iter=20, scoring='neg_mean_squared_error') Bayes_svr.fit(X, y) print("SVR MSE score:" + str(-Bayes_svr.best_score_)) endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("程序执行时间(秒):{}".format(process_time_svr)) print("最佳超参数值集合:", Bayes_svr.best_params_) save_model_object(Bayes_svr, 'BO-GP', 'SVR', 'SVR') return str( -Bayes_svr.best_score_), process_time_svr, Bayes_svr.best_params_
def rs_svr(X, y): rf_params = { 'C': stats.uniform(0, 50), "kernel": ['poly', 'rbf', 'sigmoid'], "epsilon": stats.uniform(0, 1) } n_iter_search = 20 starttime = datetime.datetime.now() clf = SVR(gamma='scale') Random_svr = RandomizedSearchCV(clf, param_distributions=rf_params, n_iter=n_iter_search, cv=3, scoring='neg_mean_squared_error') Random_svr.fit(X, y) print("SVR MSE score:" + str(-Random_svr.best_score_)) endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("程序执行时间(秒):{}".format(process_time_svr)) print("最佳超参数值集合:", Random_svr.best_params_) save_model_object(Random_svr, 'random_search', 'SVR', 'SVR') return str( -Random_svr.best_score_), process_time_svr, Random_svr.best_params_
def gpminimize_svr(X, y): starttime = datetime.datetime.now() reg = SVR(gamma='scale') space = [ Real(1, 50, name='C'), Categorical(['poly', 'rbf', 'sigmoid'], name='kernel'), Real(0, 1, name='epsilon'), ] @use_named_args(space) def objective(**params): reg.set_params(**params) return -np.mean( cross_val_score( reg, X, y, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) res_gp_svr = gp_minimize(objective, space, n_calls=20, random_state=0) print("SVR MSE score:%.4f" % res_gp_svr.fun) endtime = datetime.datetime.now() process_time_svr = endtime - starttime print("程序执行时间(秒):{}".format(process_time_svr)) print("最佳超参数值集合:", res_gp_svr.x) save_model_object(res_gp_svr.models, 'gp_minimize', 'SVR', 'SVR') return res_gp_svr.fun, process_time_svr, res_gp_svr.x
def optuna_knn(X, y): def objective(trial): params = { 'n_neighbors': trial.suggest_int("n_neighbors", 1, 20, step=1), } clf = KNeighborsRegressor(**params) score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return score study_knn = optuna.create_study(direction="minimize") study_knn.optimize(objective, n_trials=5) optuna_knn_mse_score = study_knn.best_value optuna_knn_time = (study_knn.best_trial.datetime_complete - study_knn.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_knn_time, 60) h, m = divmod(m, 60) optuna_knn_time = "%d:%02d:%09f" % (h, m, s) print("KNN MSE score:%.4f" % optuna_knn_mse_score) print("程序执行时间(秒):{}".format(optuna_knn_time)) print("最佳超参数值集合:", study_knn.best_params) save_model_object(study_knn, 'Optuna', 'KNN', 'KNN') return optuna_knn_mse_score, optuna_knn_time, study_knn.best_params
def optuna_GradientBoostingRegressor(X, y): def objective(trial): # 设定了4个搜索范围subsample,n_estimators,max_depth,lr subsample = trial.suggest_discrete_uniform("subsample", 0.1, 1.0, 0.1) n_estimators = trial.suggest_int("n_estimators", 50, 200) max_depth = trial.suggest_int("max_depth", 1, 20) lr = trial.suggest_loguniform("lr", 1e-4, 1e-1) clf = GradientBoostingRegressor(n_estimators=n_estimators, subsample=subsample, learning_rate=lr, max_depth=max_depth, random_state=0) score = -np.mean( cross_val_score(clf, X, y, cv=3, scoring="neg_mean_squared_error")) return score study_name_gbr = 'optuna-gbr' # Unique identifier of the study. study_gbr = optuna.create_study(direction="minimize", study_name=study_name_gbr) # 可以加载sqlite3的db数据库里面的信息 # study = optuna.create_study(study_name='example-study', storage='sqlite:///example.db', load_if_exists=True) # 加载后直接优化模型 study_gbr.optimize(objective, n_trials=5) optuna_gbr_mse_score = study_gbr.best_value optuna_gbr_time = (study_gbr.best_trial.datetime_complete - study_gbr.best_trial.datetime_start).total_seconds() # 秒数转化为时间格式 m, s = divmod(optuna_gbr_time, 60) h, m = divmod(m, 60) optuna_gbr_time = "%d:%02d:%09f" % (h, m, s) print("GradientBoostingRegressor MSE score:%.4f" % optuna_gbr_mse_score) print("程序执行时间(秒):{}".format(optuna_gbr_time)) print("最佳超参数值集合:", study_gbr.best_params) save_model_object(study_gbr, 'Optuna', 'GradientBoostingRegressor', 'GradientBoostingRegressor') return optuna_gbr_mse_score, optuna_gbr_time, study_gbr.best_params
def grid_ANN(X, y): ann_params = { "neurons": [512, 1028], "batch_size": [128], "epochs": [60, 80], # "activation": ['sigmoid', 'relu', 'tanh'], "patience": [3], "loss": ['mse'] } starttime = datetime.datetime.now() clf = KerasRegressor(build_fn=ANN, verbose=verbose) grid_ann = GridSearchCV(clf, ann_params, cv=3, scoring='neg_mean_squared_error') grid_ann.fit(X, y) print("ANN MSE score:" + str(-grid_ann.best_score_)) endtime = datetime.datetime.now() process_time_ann = endtime - starttime print("程序执行时间(秒):{}".format(process_time_ann)) print("最佳超参数值集合:", grid_ann.best_params_) model_grid_ann = ANN(**grid_ann.best_params_) save_model_object(model_grid_ann, 'grid_search', 'ANN', 'ANN') return str(-grid_ann.best_score_), process_time_ann, grid_ann.best_params_
def bo_tpe_lightgbm(X, y): # 参考 # https://qiita.com/TomokIshii/items/3729c1b9c658cc48b5cb data = X target = y # 2次数据划分,这样可以分成3份数据 test train validation X_intermediate, X_test, y_intermediate, y_test = train_test_split( data, target, shuffle=True, test_size=0.2, random_state=1) # train/validation split (gives us train and validation sets) X_train, X_validation, y_train, y_validation = train_test_split( X_intermediate, y_intermediate, shuffle=False, test_size=0.25, random_state=1) # delete intermediate variables del X_intermediate, y_intermediate # 显示数据集的分配比例 print('train: {}% | validation: {}% | test {}%'.format( round((len(y_train) / len(target)) * 100, 2), round((len(y_validation) / len(target)) * 100, 2), round((len(y_test) / len(target)) * 100, 2))) starttime = datetime.datetime.now() space = { # 'learning_rate': hp.uniform('learning_rate', 0.001, 0.5), # 'minibatch_frac': hp.choice('minibatch_frac', [1.0, 0.5]), # 'Base': hp.choice('Base', [b1, b2, b3]) "lambda_l1": hp.uniform("lambda_l1", 1e-8, 1.0), "lambda_l2": hp.uniform("lambda_l2", 1e-8, 1.0), "min_child_samples": hp.uniformint("min_child_samples", 5, 100), 'learning_rate': hp.uniform("learning_rate", 0.001, 0.5), "n_estimators": hp.uniformint("n_estimators", 10, 100), "num_leaves": hp.uniformint("num_leaves", 5, 35) } # n_estimators表示一套参数下,有多少个评估器,简单说就是迭代多少次 default_params = { # "n_estimators": 80, "random_state": 1, "objective": "regression", "boosting_type": "gbdt", # "num_leaves": 30, # "learning_rate": 0.3, "feature_fraction": 0.9, "bagging_fraction": 0.8, "bagging_freq": 5, "verbose": -1, } def objective(params): # 下面这个是分类classification使用的模型,不能用在regressor # dtrain = lgb.Dataset(X_train, label=y_train) params.update(default_params) clf = lgb.LGBMRegressor(**params) score = -np.mean( cross_val_score(clf, X_train, y_train, cv=3, n_jobs=-1, scoring="neg_mean_squared_error")) return {'loss': score, 'status': STATUS_OK} trials_lgb = Trials() with warnings.catch_warnings(): warnings.simplefilter("ignore") best = fmin( fn=objective, space=space, algo=tpe.suggest, # max_evals是设定多少套参数组合,组合数越大准确度可能更高但是训练的时间越长 max_evals=50, trials=trials_lgb) best_params = space_eval(space, best) lgb_model = lgb.LGBMRegressor(**best_params).fit( X_train, y_train, eval_set=[(X_validation, y_validation)], verbose=-1, # 假定n_estimators迭代器有100个设定了早期停止后也许不到100次迭代就完成了训练停止了 early_stopping_rounds=2) y_pred = lgb_model.predict(X_test) test_MSE_lgb = mean_squared_error(y_pred, y_test) print("LightGBM MSE score:%.4f" % test_MSE_lgb) endtime = datetime.datetime.now() process_time_lgb = endtime - starttime print("程序执行时间(秒):{}".format(process_time_lgb)) print("最佳超参数值集合:", best_params) save_model_object(lgb_model, 'BO-TPE', 'NGBoost', 'NGBoost') return test_MSE_lgb, process_time_lgb, best_params