def reg_fit_predict(self, x_train, x_test, y_train, y_test, est_name, report_flg=True): if est_name == 'vote': if len(self.regression_estimators.keys()) > 1: print(self.regression_estimators.items()) model = VotingRegressor( estimators=self.regression_estimators.items()) else: print('Caution: No models') return else: model = self.base_regression_estimators[est_name] model.fit(x_train, y_train) # predict test data y_pred = model.predict(x_test) # report scores if report_flg == True: self.reg_score_report(y_test, y_pred) # add model to dict self.regression_estimators[est_name] = model return
def full_train(): """ Function to train the model on all of the available data. The trained model is saved as a pickle file. Returns ------- Nothing is directly returned. The function saves the model in a pickle file for later usage in predictions. """ X, y = df_prep_split()[0], df_prep_split()[1] ridge_reg = Ridge() forest_reg = RandomForestRegressor( max_features=8, n_estimators=100, n_jobs=-1 ) # downscaled n_estimators from 500 due to memory issues on server boost_reg = GradientBoostingRegressor() ensemble_reg = VotingRegressor(estimators=[("ridge", ridge_reg), ("RF", forest_reg), ("GB", boost_reg)], n_jobs=-1) ensemble_reg.fit(X, y) PATH = os.environ.get( 'HOME') + "/app/model.pickle" # CHANGE PATH TO SERVER DIR return pickle.dump(ensemble_reg, open(PATH, "wb"))
def train(features: List[str]): in_cols = [ "climate_vs", "climate_def", "climate_vap", "climate_aet", "precipitation", "landcover_5", ] target_col = "burn_area" date_split = "2013-01-01" train_all = get_training_dataset() train_ = train_all.loc[train_all.date < date_split] valid_ = train_all.loc[train_all.date > date_split] X_train, y_train = train_[in_cols], train_[target_col] X_valid, y_valid = valid_[in_cols], valid_[target_col] xgb_model = xgb.XGBRegressor(n_estimators=300, max_depth=3, colsample_bytree=0.5, objective='reg:squarederror') xgb_model.fit(X_train, y_train) # cat_model=CatBoostRegressor(iterations=300, depth=5, learning_rate=0.1, loss_function='RMSE') # cat_model.fit(X_train, y_train,eval_set=(X_valid, y_valid),plot=True) lgb_model = lgb.LGBMRegressor(n_estimators=100, max_depth=8, num_leaves=6, objective="regression") lgb_model.fit(X_train, y_train) # voting_regressor = VotingRegressor([('xgb', xgb_model), ('cat', cat_model), ('lgb', lgb_model)]) voting_regressor = VotingRegressor([('xgb', xgb_model), ('lgb', lgb_model)]) voting_regressor.fit(X_train, y_train) return voting_regressor
def model_fit_save(train_x, train_y, test_x, test_y): ## Training the model r1 = LinearRegression() #r2 = RandomForestRegressor(n_estimators=10, random_state=1) r3 = SVR(kernel='rbf') er = VotingRegressor([ ('lr', r1), #('rf', r2), ('svr_rbf', r3) ]) er.fit(train_x, train_y) ### Evaluating based on the train data y_pred = er.predict(test_x) print('Mean Absolute Error:', mean_absolute_error(test_y, y_pred)) print('Mean Squared Error:', mean_squared_error(test_y, y_pred)) print('Root Mean Squared Error:', np.sqrt(mean_squared_error(test_y, y_pred))) ## Saving the model # Save the model as a pickle in a file joblib.dump(er, 'model.pkl')
def ensemble_lgb_regressor(self): try: root_dir = ('/Users/lujingze/Programming/SWFusion/' 'regression/tc/lightgbm/model/') model_dir = { 'SG-FL': (f"""{root_dir}na_101.845662_fl_smogn_""" f"""final_threshold_square_2/"""), 'MSE': f'{root_dir}na_2.188733/', } er_name = '' estimators = [] for idx, (name, out_dir) in enumerate(model_dir.items()): er_name += f'{name}_' save_file = [f for f in os.listdir(out_dir) if f.endswith('.pkl') and f.startswith(f'{self.basin}')] if len(save_file) != 1: self.logger.error('Count of Bunch is not ONE') exit(1) with open(f'{out_dir}{save_file[0]}', 'rb') as f: best_result = pickle.load(f) estimators.append((name, best_result.model)) er_name = er_name[:-1] er = VotingRegressor(estimators) er.fit(self.X_train, self.y_train) os.makedirs(f'{root_dir}{er_name[:-1]}/', exist_ok=True) y_pred = er.predict(self.X_test) y_pred.to_pickle(f'{er_dir}y_pred.pkl') except Exception as msg: breakpoint() exit(msg)
def main(): print(__doc__) import matplotlib.pyplot as plt from sklearn.datasets import load_diabetes from sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from sklearn.ensemble import VotingRegressor from sklearn.datasets import make_regression test_paths_file = "../test_paths_1.npy" val_paths_file = "../val_paths_1.npy" test_paths = np.load(test_paths_file, mmap_mode="r") val_paths = np.load(val_paths_file, mmap_mode="r") X, y = load_diabetes(return_X_y=True) # Train classifiers reg1 = GradientBoostingRegressor(random_state=1) reg2 = RandomForestRegressor(random_state=1) reg3 = LinearRegression() reg1.fit(X, y) reg2.fit(X, y) reg3.fit(X, y) ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)]) ereg.fit(X, y)
def voting(): # dtr model tuned_parameters = [{ 'criterion': ['mse', 'mae'], 'max_depth': np.arange(1, 10) }] dtr = GridSearchCV(DecisionTreeRegressor(), tuned_parameters, cv=5) # rfr model tuned_parameters = { 'min_samples_split': [3, 6, 9], 'n_estimators': [10, 50, 100] } rfr = GridSearchCV(RandomForestRegressor(), param_grid=tuned_parameters, cv=5) # build voting model voting_reg = VotingRegressor(estimators=[('dtr_reg', dtr), ('rfr_reg', rfr)], weights=[1, 2]) # fit the model using some training data voting_reg.fit(X_train, Y_train) # print the mean accuracy of testing predictions train_score = voting_reg.score(X_test, Y_test) # print the mean accuracy of testing predictions print("Accuracy score for final voting= " + str(round(train_score, 4)))
def voting_regressor(self): estimators_num = 10 regs = { 'GBR': GradientBoostingRegressor( random_state=1, n_estimators=estimators_num), 'RF': RandomForestRegressor( random_state=1, n_estimators=estimators_num, n_jobs=-1), 'LR': LinearRegression(), } ereg_estimators = [] ereg_name = '' for idx, (name, reg) in enumerate(regs.items()): ereg_estimators.append((name, reg)) ereg_name += f'{name}_' ereg = VotingRegressor(estimators=ereg_estimators, n_jobs=-1) ereg.fit(self.X_train, self.y_train) y_pred = ereg.predict(self.X_test) root_dir = ('/Users/lujingze/Programming/SWFusion/' 'regression/tc/lightgbm/model/') ereg_dir = f'{root_dir}{ereg_name[:-1]}/' os.makedirs(ereg_dir, exist_ok=True) dump(ereg, f'{ereg_dir}voting_model.joblib') with open(f'{ereg_dir}test_pred.pkl', 'wb') as f: pickle.dump(y_pred, f)
def rainfall_runoff(precip_file, delineated_file, discharge_file, plot_fname): # give precipitation data and delineated watershed data as input # inputs should be .mat only precip_mat = loadmat(precip_file)['basin_daily_precipitation'] basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated'] # read discharge data as .xls input discharge_df = pd.ExcelFile(discharge_file) discharge_df = discharge_df.parse(0) discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's basin_num = 5 reg1 = RandomForestRegressor(n_estimators=100, random_state=42) reg4 = BaggingRegressor(n_estimators=100, random_state=50) voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)]) X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False) voting_reg.fit(X, y) y_pred = voting_reg.predict(X) plt.scatter(y_pred, y_pred - y, c='r') plt.title("Runoff prediction data using a voting-regressor") plt.xlabel("Predicted Output") plt.ylabel("Error in prediction") print(plot_fname) plt.savefig(plot_fname)
def test_weights_regressor(): """Check weighted average regression prediction on boston dataset.""" reg1 = DummyRegressor(strategy='mean') reg2 = DummyRegressor(strategy='median') reg3 = DummyRegressor(strategy='quantile', quantile=.2) ereg = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 2, 10]) X_r_train, X_r_test, y_r_train, y_r_test = \ train_test_split(X_r, y_r, test_size=.25) reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test) reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test) reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test) ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test) avg = np.average(np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]) assert_almost_equal(ereg_pred, avg, decimal=2) ereg_weights_none = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=None) ereg_weights_equal = VotingRegressor([('mean', reg1), ('median', reg2), ('quantile', reg3)], weights=[1, 1, 1]) ereg_weights_none.fit(X_r_train, y_r_train) ereg_weights_equal.fit(X_r_train, y_r_train) ereg_none_pred = ereg_weights_none.predict(X_r_test) ereg_equal_pred = ereg_weights_equal.predict(X_r_test) assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
def train(self): self.gripperjack = self.gripperjack[0] self.location = self.location[0] generator = pg.generator_factory(self.type) self.df: pd.DataFrame = generator.generate(self.gripperjack, self.location, 1) print(self.df.columns) self.df = self.df.drop(columns=['Timestamp']).dropna() print('DATAFRAME IS LOADED IN') x = None x_train = None x_test = None y = None y_train = None y_test = None regressor = None y = self.df.pop('next') x = self.df x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) r = [('K Neighbour Regressor', KNeighborsRegressor(n_neighbors=15, n_jobs=5, leaf_size=50)), ('Random Forrest Regressor', RandomForestRegressor(n_estimators=200, n_jobs=5)), ('Ada Regressor', AdaBoostRegressor(n_estimators=100, learning_rate=0.1))] regressor = VotingRegressor(r, weights=[0.1, 1, 0.1]) regressor.fit(x_train, y_train) print('===================') print('SCORE X/Y TEST') print(regressor.score(x_test, y_test)) dump_location = 'Recources\\regressor_dumps\\' + self.type + '\\' + str( self.gripperjack) + '\\' + self.location print('==================') print('ACCURACY') y_pred = regressor.predict(x_test) mae = metrics.mean_absolute_error(y_test, y_pred) mape = (mae / (y.max() - y.min())) * 100 print('MAE') print(mae) print('MAPE') print(mape) if not os.path.exists(dump_location): os.makedirs(dump_location) pickle.dump(regressor, open(dump_location + '\\regressor.sav', 'wb')) return mape
def voting_predictions(data, base_models, val=True): data = data_copy(data) Xtrain, Xtest, y = data index = 0 vote_params = [] for base_model in base_models: name = 'model' + str(index) index += 1 model = base_model[0] params = base_model[1] model = model(**params) result = (name, model) vote_params.append(result) votemodel = VotingRegressor(vote_params) votemodel.fit(Xtrain, y) y_pred = votemodel.predict(Xtest) y_pred = np.exp(y_pred) if val: k_fold_crossval(data, model=votemodel) y_pred = np.exp(y_pred) return y_pred
def voting_compile_fit(self): #This funtion does compiling and fitting on VotingRegressor prev_mse = 0 i = 0 #We do n fitting and compling to find the best VotingRegressor while (i < self.n_repetition): if i == 0: self.voting_reg = VotingRegressor(estimators=self.reg_models) self.voting_reg.fit(self.X_train, self.y_train.values.ravel()) y_pred = self.voting_reg.predict(self.X_test) prev_mse = mean_squared_error(self.y_test, y_pred) print(i + 1, ". ", "Voting_reg", prev_mse / 1000000) else: current_reg = VotingRegressor(estimators=self.reg_models) current_reg.fit(self.X_train, self.y_train.values.ravel()) y_pred = current_reg.predict(self.X_test) mse = mean_squared_error(self.y_test, y_pred) print(i + 1, ". ", "Voting_reg", mse / 1000000) if mse < prev_mse: self.voting_reg = current_reg prev_mse = mse i = i + 1
def make_voting_regressor(y, x_vars): estimator_list = [("mlp", MLPRegressor(random_state=1, max_iter=250)), ("random_forest", RandomForestRegressor(n_jobs=1)), ("nearest_neighbor", KNeighborsRegressor(n_neighbors=4)), ("decision_tree", DecisionTreeRegressor(random_state=0)), ("gradient_boost", GradientBoostingRegressor(random_state=0))] ereg = VotingRegressor(estimators=estimator_list) ereg.fit(x_vars, y) return ereg, {}
def run_ensemble_run(self, model_name = 'Ensemble'): reg1 = SVR(C=10, kernel= "rbf", epsilon = 0.1, gamma = 'auto') reg2 = KNeighborsRegressor(n_neighbors = 11) reg3 = RandomForestRegressor(n_estimators = 100) model = VotingRegressor([('RF', reg3)]) model.fit(self.X_train, self.Y_train) self.evaluate_regression(self.Y_train, model.predict(self.X_train), self.dates_train, model_name+'-OnTrain', slicer = 1) self.evaluate_regression(self.Y_test, model.predict(self.X_test), self.dates_test, model_name+'-OnTest', slicer = 1)
def train_voting_regressor(algos): vr = VotingRegressor(algos) vr.fit(X_train, y_train) y_pred = vr.predict(X_test1) r2 = r2_score(y_test1, y_pred) mae = mean_absolute_error(y_test1, y_pred) return vr, r2, mae
def get_training_goals(X, y, X_test): # 集成学习 voting_reg = VotingRegressor(estimators=[ ('rf_ploy', forest_polynomialregression(degree=3)), ('gb_ploy', gb_polynomialregression(degree=3)), ('ls_ploy', ls_polynomialregression(degree=3)), # ('rf_reg', RandomForestRegressor(n_estimators=100, oob_score=True, random_state=500)), # ('gb_reg', GradientBoostingRegressor(loss='ls', max_depth=3, max_leaf_nodes=10, min_samples_leaf=1, n_estimators=200, random_state=100)), # ('ls_reg', LassoCV(eps=1e-3, cv=4, max_iter=5000, random_state=100)) ], weights=[0.2, 0.6, 0.2]) voting_reg.fit(X, y) predict_y = voting_reg.predict(X_test) return predict_y
def ensemble_of_best_params_xgb_reg(self, fn_name, space, algo, max_evals): best_params = self.params_to_ensemble(fn_name, space, algo, max_evals) models_to_voting = {} for i in range(len(best_params)): reg = xgb.XGBRegressor(**best_params[i]) models_to_voting[str(i)] = reg model_ensemble = VotingRegressor([ (name, model) for name, model in models_to_voting.items() ]) model_ensemble.fit(self.data, self.labels) return model_ensemble, best_params
def vote(self, model_path=None, dataset_number=1): # Trained regressors reg1 = load(r'sklearn_models7/HGBR1_DS{0}.joblib'.format(dataset_number)) reg2 = load(r'sklearn_models7/RFR1_DS{0}.joblib'.format(dataset_number)) reg3 = load(r'sklearn_models7/MLPR1_DS{0}.joblib'.format(dataset_number)) # reg4 = load(r'sklearn_models7/SGDR1_DS1.joblib') ereg = VotingRegressor( [('HGBR1_DS{0}'.format(dataset_number), reg1), ('RFR1_DS{0}'.format(dataset_number), reg2), ('MLPR1_DS{0}'.format(dataset_number), reg3)], weights=[3. / 6., 2. / 6., 1. / 6.] ) ereg.fit(self.X_train, self.y_train) dump(ereg, model_path) self.evaluate_model(model=ereg, model_path=model_path)
def steam_voting_predict_learned(data): """ Runs the voting model with the values to predict already being in the model. """ pre_learned_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]] pre_learned_label = data[["price_"]] gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20) random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20) linear_regression_model = linear_model.LinearRegression() voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)]) voting_model.fit(pre_learned_train, pre_learned_label.values.ravel()) preds = voting_model.predict(pre_learned_train) mse = mean_squared_error(pre_learned_label, preds) return np.mean(mse)
def trail_main(): n_folds = 10 train_path = 'data/assign3_students_train.txt' test_path = 'data/assign3_students_test.txt' train_data = read_process_data(train_path) test_data = read_process_data(test_path) models_dict = get_models() scores_dict = {} learned_models_dict = {} for df_key, df_val in train_data.items(): X_train, X_test, y_train, y_test = get_final_score_tts( df_val.copy(), test_data[df_key].copy(), n_best=15) voting_list = [] for model_key, model_val in models_dict.items(): model = model_val.fit(X_train, y_train) name = f'{df_key}_{model_key}' learned_models_dict[name] = model voting_list.append((name, model)) # print(f"{name}, Train MSE ", mean_squared_error(y_train, model.predict(X_train))) # print(f"{name}, Train RScore ", r2_score(y_train, model.predict(X_train))) # print(f"{name}, Test RScore ", r2_score(y_test, model.predict(X_test))) print(f"X_test: {X_test.shape}, y_test: {y_test.shape}") print(f"{name}, Test MSE ", mean_squared_error(y_test, model.predict(X_test))) print(f"{name}, Test Score", model.score(X_test, y_test)) print('=' * 75, '\n') model = VotingRegressor(voting_list) model = model.fit(X_train, y_train) print('=' * 75, '\n') print(f"{df_key}, Voting Test MSE = ", mean_squared_error(y_test, model.predict(X_test))) print(f"{df_key}, Voting Test Score", model.score(X_test, y_test)) print('=' * 75, '\n\n')
def test_get_features_names_out_regressor(): """Check get_feature_names_out output for regressor.""" X = [[1, 2], [3, 4], [5, 6]] y = [0, 1, 2] voting = VotingRegressor(estimators=[ ("lr", LinearRegression()), ("tree", DecisionTreeRegressor(random_state=0)), ("ignore", "drop"), ]) voting.fit(X, y) names_out = voting.get_feature_names_out() expected_names = ["votingregressor_lr", "votingregressor_tree"] assert_array_equal(names_out, expected_names)
def run_regressors(): pyplot.plot(y_test, label='Actual') pyplot.legend() pyplot.xlabel('Time') pyplot.ylabel('USD/TRY') pyplot.show() # Voting Regressor reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) reg2 = RandomForestRegressor(random_state=1, n_estimators=10) reg3 = LinearRegression() model = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2), ('lr', reg3)]) model = model.fit(normalized_train_x, numpy.ravel(y_train)) train_predict, test_predict = make_predictions(model, normalized_train_x, normalized_test_x) score_regressions('Voting Regressor', y_train, train_predict, y_test, test_predict) # score_classifications('Voting Regressor', y_train, train_predict, y_test, test_predict) # plot_graph(y_test, test_predict, 'Voting Regressor') voting = test_predict xgb = execute_model('Extreme Gradient Boost Regressor', {}, XGBRegressor) linearRegression = execute_model('Linear Regression Regressor', linearRegressionParameters, LinearRegression) ridge = execute_model('Ridge Regressor', ridgeParameters, Ridge) bayesianRidge = execute_model('Bayesian Ridge Regressor', bayesianRidgeParameters, BayesianRidge) lasso = execute_model('Lasso Regressor', lassoParameters, Lasso) lassoLars = execute_model('Lasso Lars Regressor', lassoLarsParameters, LassoLars) tweedie = execute_model('Tweedie Regressor', tweedieParameters, TweedieRegressor) svr = execute_model('SVR Regressor', svrParameters, SVR) sgd = execute_model('SGD Regressor', sgdParameters, SGDRegressor) kNeighbors = execute_model('K Neighbors Regressor', kNeighborsParameters, KNeighborsRegressor) gaussian = execute_model('Gaussian Process Regressor', gaussianProcessorParameters, GaussianProcessRegressor) mlp = execute_model('MLP Regressor ( FeedForward ANN )', mlpParameters, MLPRegressor)
def vote_prediction(X_train, X_test, y_train, y_test, alpha, l1_ratio, n_estimators, max_depth, c, gamma): # def vote_prediction(X_train, X_test, y_train, y_test, forest, svr): print("******************* VOTING ******************", end="\n\n") # forest = RandomForestRegressor(n_estimators=242, max_depth=5) # elasic_net = ElasticNet(alpha=0.141, l1_ratio=1.0) forest = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth) # elasic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio) # linear_regressor = LinearRegression() svr = SVR(kernel='rbf', C=c, gamma=gamma) voting_regressor = VotingRegressor(estimators=[ ('rf', forest), # ('enet', elasic_net), # ('lr', linear_regressor), ('svr', svr) ]) voting_regressor = voting_regressor.fit(X_train, y_train) y_pred = voting_regressor.predict(X_test) evaluate('Voting', y_test, y_pred, write_predictions=True) print("\n*********************************************", end="\n\n")
def voting_regressor_ensemble_3(self): lr, lr_pred = self.linear_regr() rf, rf_pred = self.random_forest_regr() er = VotingRegressor([ ('lr', lr), ('rf', rf), ], n_jobs=-1) return er.fit(self.x_train, self.y_train).predict(self.x_test)
def fit_best_model( feature_mapping=features_mapping_dict, best_params=best_params, save_to_disk=True ): # load data data = np.load("linear_regression_competition.train.npz") features, targets = data["data"], data["target"] # define models estimator_svr = Pipeline( steps=[ ("preprocessing", preprocessing(features_mapping_dict)), ("estimator", SVR(**best_params["svr"])), ] ) estimator_rf = Pipeline( steps=[ ("preprocessing", preprocessing(features_mapping_dict)), ("estimator", RandomForestRegressor(**best_params["rf"])), ] ) estimator_gb = Pipeline( steps=[ ("preprocessing", preprocessing(features_mapping_dict)), ("estimator", GradientBoostingRegressor(**best_params["gb"])), ] ) estimator_elastic_net = Pipeline( steps=[ ("preprocessing", preprocessing(features_mapping_dict)), ("estimator", ElasticNet(**best_params["elastic_net"])), ] ) voter = VotingRegressor( estimators=[ ("gb", estimator_gb), ("rf", estimator_rf), ("lr", estimator_elastic_net), ("svr", estimator_svr), ] ) voter.fit(features, targets) with open("linear_regression_competition.model", "wb") as model_file: pickle.dump(voter, model_file)
def steam_best_model_test(data): """ Fits the best model with 90% of our data then predicts on the remaining 10%. This simulates a "Real world situation" """ best_train = data[["positive_ratings_", "negative_ratings_", "owners_", "average_playtime_", "median_playtime_"]] best_label = data[["price_"]] X_train, X_test, y_train, y_test = train_test_split(best_train, best_label, test_size=0.1, random_state=2) gradient_boosting_model = GradientBoostingRegressor(random_state=1, n_estimators=20) random_forest_model = RandomForestRegressor(random_state=1, n_estimators=20) linear_regression_model = linear_model.LinearRegression() voting_model = VotingRegressor(estimators=[('gb', gradient_boosting_model), ('rf', random_forest_model), ('lr', linear_regression_model)]) voting_model.fit(X_train, y_train.values.ravel()) preds = voting_model.predict(X_test) mse = mean_squared_error(y_test, preds) return np.mean(mse)
def voting_regressor_ensemble_1(self): lr, lr_pred = self.linear_regr() lasso, lasso_pred = self.lasso_regr() rf, rf_pred = self.random_forest_regr() er = VotingRegressor([ ('lr', lr), ('lasso', lasso), ("rf", rf) ], n_jobs=-1) return er.fit(self.x_train, self.y_train).predict(self.x_test)
def get_flow(precip_file, delineated_file, discharge_file, D, T, file_name_b4_reg, file_name_after_reg): # give precipitation data and delineated watershed data as input # inputs should be .mat only precip_mat = loadmat(precip_file)['basin_daily_precipitation'] basin_mat_delineated = loadmat(delineated_file)['basin_mat_delineated'] print(basin_mat_delineated.shape) # read discharge data as .xls input discharge_df = pd.ExcelFile(discharge_file) discharge_df = discharge_df.parse(0) discharge_df = discharge_df.fillna(0) # Replace the nan values with 0's all_datetimes = discharge_df['Date'] all_years = list(map(lambda datetime_obj: int(datetime_obj.date().strftime("%Y")), all_datetimes)) years_list = list(set(all_years)) discharge_df["Year"] = all_years # num days is D and num_years is T in the DQT format # D,T are USER INPUTS num_days = int(D) num_years = int(T) gather_dqt_plot(0, discharge_df, years_list, num_days, num_years, file_name_b4_reg) basin_num = 5 reg1 = RandomForestRegressor(n_estimators=100, random_state=42) reg4 = BaggingRegressor(n_estimators=100, random_state=50) voting_reg = VotingRegressor([('br', reg4), ('rf', reg1)]) X, y = get_data(discharge_df, basin_num, precip_mat, basin_mat_delineated, False) voting_reg.fit(X, y) new_discharge_df = deepcopy(discharge_df) new_discharge_df = new_discharge_df[(new_discharge_df["Year"] >= years_list[0]) & (new_discharge_df["Year"] <= years_list[-1])] print(len(discharge_df['Year']), len(new_discharge_df["Year"])) X, y = get_data(new_discharge_df, basin_num, precip_mat, basin_mat_delineated, True) y_pred = voting_reg.predict(X) new_discharge_df["New_Discharge"] = y_pred gather_dqt_plot(1, new_discharge_df, years_list, num_days, num_years, file_name_after_reg)
def test_onnxt_iris_voting_regressor(self): iris = load_iris() X, y = iris.data, iris.target y = y.astype(numpy.float32) X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11) clr = VotingRegressor(estimators=[( 'lr', LinearRegression()), ('dt', DecisionTreeRegressor(max_depth=2))]) clr.fit(X_train, y_train) X_test = X_test.astype(numpy.float32) X_test = numpy.vstack([X_test[:4], X_test[-4:]]) res0 = clr.predict(X_test).astype(numpy.float32) model_def = to_onnx(clr, X_train.astype(numpy.float32)) oinf = OnnxInference(model_def, runtime='python') res1 = oinf.run({'X': X_test}) regs = DataFrame(res1['variable']).values self.assertEqualArray(res0, regs.ravel(), decimal=6)