Пример #1
0
    def optimize(self):
        """Set up and run Bayesian Optimization on the BRNN using GPy

        Returns
        -------
        list
                the best hyperparameters are chosen by Bayesian Optimization. Returned
                in the order: [lr, nl, hs]
        """

        # Initial hyperparameter search -- used to get noise estimate
        x_init = np.array([[-3.0, 1, 20], [-3.0, 2, 20], [-3.0, 3, 20],
                           [-3.0, 4, 20], [-3.0, 5, 20], [-2.0, 2, 20],
                           [-3.3, 2, 20], [-4.0, 2, 20], [-5.0, 2, 20],
                           [-3.0, 2, 5], [-3.0, 2, 15], [-3.0, 2, 35],
                           [-3.0, 2, 50]])
        y_init, noise = self.initial_search(x_init)

        if self.silent is False:
            print("\nInitial search results:")
            print("lr\tnl\ths\toutput")
            for i in range(len(x_init)):
                print("%.5f\t%2d\t%2d\t%.4f" % (10**x_init[i][0], x_init[i][1],
                                                x_init[i][2], y_init[i][0]))
            print("Noise estimate:", noise)
            print('\n')
            print('Primary optimization:')
            print('--------------------\n')
            print(
                'Learning rate   |   n_layers   |   hidden vector size |  avg CV loss  '
            )
            print(
                '======================================================================'
            )

        optimizer = BayesianOptimization(f=self.compute_cv_loss,
                                         domain=self.bds,
                                         model_type='GP',
                                         acquisition_type='EI',
                                         acquisition_jitter=0.05,
                                         X=x_init,
                                         Y=y_init,
                                         noise_var=noise,
                                         maximize=False)

        optimizer.run_optimization(max_iter=self.max_iterations)

        ins = optimizer.get_evaluations()[0]
        outs = optimizer.get_evaluations()[1].flatten()

        if self.silent is False:
            print(
                "\nThe optimal hyperparameters are:\nlr = %.5f\nnl = %d\nhs = %d"
                % (10**
                   optimizer.x_opt[0], optimizer.x_opt[1], optimizer.x_opt[2]))
            print()

        return optimizer.x_opt
Пример #2
0
                                 model_type='GP',
                                 kernel=kernel,
                                 acquisition_type='EI',
                                 maximize=False)

t0 = time.time()
optimizer.run_optimization(max_iter=200, max_time=7200)
t1 = time.time()
print("time:")
print(t1 - t0)

#optimizer.plot_acquisition()
optimizer.plot_convergence()

# get the candidate solutions and their evaluations
ins = optimizer.get_evaluations()[0]
outs = optimizer.get_evaluations()[1]
outputs = outs.flatten()
# sort in descending order
outputs.sort()
reverse_array = outputs[::-1]
#print(reverse_array)

plt.figure(figsize=(6.4, 4.8))
plt.plot(reverse_array, color='blue')
plt.xlabel("Iterations")
plt.ylabel("Objective Value")
plt.title("Best Candidate Solution at each Iteration", fontsize='small')
plt.grid(color='skyblue', linestyle=':', linewidth=0.5)
plt.tight_layout()
plt.ylim(bottom=0)
class Bayesian_Opt(object):
    def __init__(self,
                 x_train_features,
                 data_train,
                 n_sample_subset,
                 timesplitmethod,
                 model_choose,
                 loss_function,
                 log_transform,
                 fit_trend_method,
                 parameters_trend,
                 mode_dataset,
                 percentage_validation,
                 overlap,
                 bayes_tuning_on=True,
                 predict_intervals=False):
        self.x_train_features = x_train_features
        self.data_train = data_train
        self.opt_train_size = len(
            self.x_train_features[:, 0])  #2D columnar x_train_features
        self.n_sample_subset = n_sample_subset
        self.timesplitmethod = timesplitmethod
        self.model_choose = model_choose
        self.predict_intervals = predict_intervals
        self.loss_function = loss_function
        self.data_train_log_trend = data_train
        self.model_cv, self.domain, self.func, self.opt_search, self.predict_train, self.model_final = [
            None
        ] * 6
        self.fitted = 0
        self.pred_mod_intervals = 0
        self.log_transform = log_transform
        self.fit_trend_method = fit_trend_method
        self.parameters_trend = parameters_trend
        self.mode_dataset = mode_dataset
        self.decompose_model = None
        self.percentage_validation = percentage_validation
        self.overlap = overlap
        self.bayes_tuning_on = bayes_tuning_on

    # CREATE THE DECOMPOSE OBJECT: return the times series without trend
    def descompose_train_data(self):
        self.decompose_model = Decompose(self.x_train_features,
                                         self.data_train, self.log_transform,
                                         self.fit_trend_method,
                                         self.parameters_trend,
                                         self.mode_dataset)
        self.data_train_log_trend = self.decompose_model.descompose_train_data(
        )

    # DEFINES THE REGRESSOR MODEL CHOSEN AND ITS DOMAIN
    def choose_model(self):
        self.model_cv = ModelRegressor(self.model_choose,
                                       self.predict_intervals)
        self.domain = self.model_cv.domains_models()

    # DEFINES THE OBJECTIVE FUNCTION: it will be the input of the BayesianOptimization module (GpyOpt)
    def func_to_optimize(self):
        #Declaro el objeto Objective_Func importado
        self.func = Objective_Func(self.x_train_features, self.data_train,
                                   self.data_train_log_trend,
                                   self.n_sample_subset, self.timesplitmethod,
                                   self.opt_train_size, self.model_cv,
                                   self.loss_function, self.decompose_model,
                                   self.percentage_validation, self.overlap)

    # DEFINES THE BAYESIAN OPTIMIZATION OBJECT FOR THE EXTERNAL LIBRARY GPYOPT
    def Bayesdefinition(self):
        self.opt_search = BayesianOptimization(self.func.objective_function,
                                               domain=self.domain,
                                               model_type='GP',
                                               acquisition_type='EI',
                                               num_cores=-1,
                                               verbosity=False)

    # RUN THE BAYES OPTIMIZATION: call the four previous objects and after run the run_optimization method
    # Return the optimal parameters for the model chosen and the evaluations of the Bayes Optimization method.
    def run(self, max_iter=100):
        self.descompose_train_data()
        self.choose_model()
        self.func_to_optimize()

        if self.bayes_tuning_on == True:
            self.Bayesdefinition()
            self.opt_search.run_optimization(max_iter)
            print('The run has finished correctly')
            #plot = self.opt_search.plot_convergence()
            self.opt_param = self.opt_search.x_opt  #Get the optimal parameters
            dict_param = {
                sentence['name']: self.opt_param[i]
                for i, sentence in enumerate(self.domain)
            }
            evaluations = self.opt_search.get_evaluations()[
                0]  #Get the Bayes Optimization evaluations
            rmse_evaluations = self.opt_search.get_evaluations()[1]
            eval_columns = pd.DataFrame({
                sentence['name']: evaluations[:, i]
                for i, sentence in enumerate(self.domain)
            })
            eval_columns['rmse'] = rmse_evaluations

        elif self.bayes_tuning_on == False:
            print('Bayesian Optimization is not activated')
            self.opt_param = [x['domain'][0] for x in self.domain]
            dict_param = {
                sentence['name']: self.opt_param[i]
                for i, sentence in enumerate(self.domain)
            }
            eval_columns = []

        return [dict_param, eval_columns]

    #INITIALIZE THE MODEL CHOSEN WITH THE OPTIMAL PARAMETERS. It returns the final model object.
    def best_model(self):
        list_hyperparam_final = [hyperparam for hyperparam in self.opt_param]

        if self.predict_intervals == False:
            self.model_final = self.model_cv.regressor(list_hyperparam_final)

        # For this option we initialize 3 different models with the hyperparameters for three diferent quantiles.
        if self.predict_intervals == True:
            model_final_mid = self.model_cv.regressor(list_hyperparam_final)
            model_final_down = self.model_cv.regressor(list_hyperparam_final,
                                                       pred_lower_upper=-1)
            model_final_up = self.model_cv.regressor(list_hyperparam_final,
                                                     pred_lower_upper=1)

            self.model_final = [
                model_final_mid, model_final_down, model_final_up
            ]

        return self.model_final

    #  FIT THE DATA TO THE BEST MODEL
    def fit(self):
        if self.predict_intervals == False:
            self.best_model().fit(self.x_train_features,
                                  self.data_train_log_trend)

        # For this option we fit the 3 different models for the three diferent quantiles
        if self.predict_intervals == True:
            [
                model.fit(self.x_train_features, self.data_train_log_trend)
                for model in self.best_model()
            ]

        self.fitted = 1
        print('The training fit is done')

    #MAKE THE PREDICTIONS ON THE OPTIMAL MODEL: returns the final prediction.
    # Enter x_feature as A 2D-columnar array where x_feature[:,0] is the series of days.
    def predict(self, x_features):
        if self.fitted == 1:
            final_prediction = None

            if self.predict_intervals == False:
                prediction_without_trend = self.model_final.predict(
                    x_features).reshape(-1, 1)
                final_prediction = self.decompose_model.predict_compose(
                    x_features, prediction_without_trend)

            if self.predict_intervals == True:
                prediction_without_trend = [
                    model.predict(x_features).reshape(-1, 1)
                    for model in self.model_final
                ]
                final_prediction_mid = self.decompose_model.predict_compose(
                    x_features, prediction_without_trend[0])
                final_prediction_down = self.decompose_model.predict_compose(
                    x_features, prediction_without_trend[1])
                final_prediction_up = self.decompose_model.predict_compose(
                    x_features, prediction_without_trend[2])
                final_prediction = [
                    final_prediction_mid, final_prediction_down,
                    final_prediction_up
                ]

            return final_prediction
        else:
            print('You need to fit the model to your data')
Пример #4
0
# create the plot
plt.plot(x, f_x, 'b-')
plt.show()

# plot previous line using streamlit
st.pyplot()

# ========== set up and run bayesian optmization ==========
# run bayesian optimization on charge
# the following 'domain' is a list of dictionaries containing the description of the inputs variables (See GPyOpt.core.space.Design_space class for details).
domain = [{'name': 'var_1', 'type': 'continuous', 'domain': (-5, 4)}]

# f = objective fucntion for the Bayesian Optimization; domain = [refer to above]
myBopt_1d = BayesianOptimization(f=obj_func, domain=domain)
myBopt_1d.run_optimization(max_iter=5)
myBopt_1d.plot_acquisition()

# plot the acquisition function using streamlit
st.pyplot()

# ========== get the output of the bayesian optimization ==========

ins = myBopt_1d.get_evaluations()[1].flatten()
outs = myBopt_1d.get_evaluations()[0].flatten()
evals = pd.DataFrame({'x': ins, 'y': outs})

# plot
st.write(evals)

st.markdown("The minumum value obtained by the function was %.4f (x = %.4f)" %
            (myBopt_1d.fx_opt, myBopt_1d.x_opt))
Пример #5
0
class PipelineOpt:
    def __init__(
        self,
        static_imputation_model_list,
        temporal_imputation_model_list,
        static_feature_selection_model_list,
        temporal_feature_selection_model_list,
        prediction_model_list,
        dataset_training,
        dataset_testing,
        task,
        metric_name,
        metric_parameters,
    ):
        self.dataset_testing = dataset_testing
        self.dataset_training = dataset_training
        self.static_imputation_model_list = static_imputation_model_list
        self.temporal_imputation_model_list = temporal_imputation_model_list
        self.static_feature_selection_model_list = static_feature_selection_model_list
        self.temporal_feature_selection_model_list = temporal_feature_selection_model_list
        self.prediction_model_list = prediction_model_list

        # imputation

        static_imputation_list = [
            imputation.Imputation(imputation_model_name=x, data_type="static")
            for x in static_imputation_model_list
        ]
        temporal_imputation_list = [
            imputation.Imputation(imputation_model_name=x,
                                  data_type="temporal")
            for x in temporal_imputation_model_list
        ]

        # feature selection

        static_feature_selection_list = []
        for x in static_feature_selection_model_list:
            # Select relevant features
            static_feature_selection = FeatureSelection(
                feature_selection_model_name=x[0],
                feature_type="static",
                feature_number=x[1],
                task=task,
                metric_name=metric_name,
                metric_parameters=metric_parameters,
            )
            static_feature_selection_list.append(static_feature_selection)

        temporal_feature_selection_list = []
        for x in temporal_feature_selection_model_list:
            # Select relevant features
            temporal_feature_selection = FeatureSelection(
                feature_selection_model_name=x[0],
                feature_type="temporal",
                feature_number=x[1],
                task=task,
                metric_name=metric_name,
                metric_parameters=metric_parameters,
            )
            temporal_feature_selection_list.append(temporal_feature_selection)

        # prediction
        pred_class_list = []

        # Set predictive model
        model_name_list = prediction_model_list

        for model_name in model_name_list:
            # Set model parameters
            model_parameters = {
                "h_dim": 100,
                "n_layer": 2,
                "n_head": 2,
                "batch_size": 128,
                "epoch": 2,
                "model_type": model_name,
                "learning_rate": 0.001,
                "static_mode": "Concatenate",
                "time_mode": "Concatenate",
                "verbose": False,
            }

            # Train the predictive model
            pred_class = prediction(model_name, model_parameters, task)
            pred_class_list.append(pred_class)

        self.pred_class_list = pred_class_list
        self.temporal_feature_selection_list = temporal_feature_selection_list
        self.static_feature_selection_list = static_feature_selection_list
        self.temporal_imputation_list = temporal_imputation_list
        self.static_imputation_list = static_imputation_list
        self.domain = [
            {
                "name": "static_imputation",
                "type": "discrete",
                "domain": list(range(len(static_imputation_list)))
            },
            {
                "name": "temporal_imputation",
                "type": "discrete",
                "domain": list(range(len(temporal_imputation_list)))
            },
            {
                "name": "static_feature_selection",
                "type": "discrete",
                "domain": list(range(len(static_feature_selection_list))),
            },
            {
                "name": "temporal_feature_selection",
                "type": "discrete",
                "domain": list(range(len(temporal_feature_selection_list))),
            },
            {
                "name": "pred_class",
                "type": "discrete",
                "domain": list(range(len(pred_class_list)))
            },
        ]
        self.myBopt = BayesianOptimization(f=self.f, domain=self.domain)

    def run_opt(self, steps):
        self.myBopt.run_optimization(max_iter=steps)
        opt_sol, opt_obj = self.myBopt.get_evaluations()
        sol = np.where(opt_obj.flatten() == opt_obj.min())
        ind = sol[0]
        best_model = opt_sol[ind]
        best_obj = opt_obj.min()
        best_model = best_model.flatten()
        best_model_list = [
            self.static_imputation_model_list[int(best_model[0])],
            self.temporal_imputation_model_list[int(best_model[1])],
            self.static_feature_selection_model_list[int(best_model[2])],
            self.temporal_feature_selection_model_list[int(best_model[3])],
            self.prediction_model_list[int(best_model[4])],
        ]
        return best_model_list, best_obj

    def f(self, a):
        si, ti, sf, tf, pc = a[0]
        try:
            static_imputation = self.static_imputation_list[int(si)]
            temporal_imputation = self.temporal_imputation_list[int(ti)]
            static_feature_selection = self.static_feature_selection_list[int(
                sf)]
            temporal_feature_selection = self.temporal_feature_selection_list[
                int(tf)]
            pred_class = self.pred_class_list[int(pc)]

            pipeline = PipelineComposer(static_imputation, temporal_imputation,
                                        static_feature_selection,
                                        temporal_feature_selection)

            dataset_training = pipeline.fit_transform(self.dataset_training)
            dataset_testing = pipeline.transform(self.dataset_testing)

            # only do once
            if not dataset_training.is_validation_defined:
                dataset_training.train_val_test_split(prob_val=0.2,
                                                      prob_test=0.0)

            # Set up validation for early stopping and best model saving
            pred_class.fit(dataset_training)
            # Return the predictions on the testing set
            test_y_hat = pred_class.predict(dataset_testing)
            metric = BOMetric(metric="auc", fold=0, split="test")
            met_val = metric.eval(dataset_testing, test_y_hat)
            met_val = met_val[met_val != 0].mean()
        except Exception:
            met_val = 1e-9
        return met_val
Пример #6
0
              {'name': 'gamma',  'type': 'continuous', 'domain': (0.0001, 100)}]

def svr_val(trans):
    def f(params):
        params = params[0]
        clf = SVC(C=params[0], kernel='poly', gamma=params[1])
        return eval(clf, trans)
    return f

print("Running optimization on the models.")
print("================================================================")
# Define a dictionary to store the results of the Bayesian optimization
results, count = {}, 0
for key, val in transforms.items():
    print("Running optimization for model: {}".format(key))
    print("This is {} of {} optimizations.".format(count + 1, n_models))
    print("---------------------------------")
    opt = BayesianOptimization(f=svr_val(val),
                               domain=parameters,
                               initial_design_numdata=30,
                               num_cores=10,
                               maximize=True)
    opt.run_optimization(max_iter=30)
    results[key] = opt.get_evaluations()
    # Pickle the results so we can go through and find the best model. Do This
    # in loop so that we can walk away with results pre completion if needed.
    with open('svc_opt_results.pickle', 'wb') as handle:
        pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)
    # Update count
    count += 1
Пример #7
0
class optimise:
    def __init__(self, n, hf_model, lf_model, hf_max_iter, lf_max_iter, EI_req = 1*10**7):
        """
        model options: beam, unit, equiv_cant
        """
        self.n = n
        self.hf_model = hf_model
        self.lf_model = lf_model
        self.EI_req = EI_req
        self.hf_max_iter = hf_max_iter
        self.lf_max_iter = lf_max_iter
        self.results = {'n': self.n, 'lf model': self.lf_model, 'hf iter': self.hf_max_iter, 'lf iter': self.lf_max_iter}

        node_domain = [{'name':'node_coord', 'type':'continuous', 'domain':(0, 1)}]
        d_domain = [{'name':'member_d', 'type':'continuous', 'domain':(0, 0.1)}]
        self.domain = node_domain*n*3 + d_domain*int((n+1)*8+n*(n-1)/2)
    
    def run_hf_opt(self):
        def f_hf(X):
            return(structure(self.hf_model, self.n, X[0], self.EI_req).score)
        self.hf_opt = BayesianOptimization(f_hf,
                             domain=self.domain,
                             acquisition_type="EI",
                             model_type='GP',
                             exact_feval=True)
        self.hf_opt.run_optimization(max_iter = self.hf_max_iter, eps=1e-6)
        self.X_H, self.y_H = self.hf_opt.get_evaluations()

    def run_lf_opt(self):
        def f_lf(X):
            return(structure(self.lf_model, self.n, X[0], self.EI_req).score)
        self.lf_opt = BayesianOptimization(f_lf,
                             domain=self.domain,
                             acquisition_type="EI",
                             model_type='GP',
                             exact_feval=True)
        self.lf_opt.run_optimization(max_iter = self.lf_max_iter, eps=1e-6)
        self.X_L, self.y_L = self.lf_opt.get_evaluations()
    
    def run_mf_opt(self):
        X_L, y_L = self.X_L[(self.y_L<1*10**9).T[0],:], self.y_L[(self.y_L<1*10**9).T[0],:]
        X_H, y_H = self.X_H[(self.y_H<1*10**9).T[0],:], self.y_H[(self.y_H<1*10**9).T[0],:]
        mf_model = Multifidelity_GP(X_L, y_L, X_H, y_H)
        while mf_model.jitter <= 0.1:
            try:
                mf_model.train()
            except np.linalg.LinAlgError:
                mf_model.jitter = mf_model.jitter*10
            else:
                print(mf_model.jitter)
                break

        def f_mf(X):
            X = np.atleast_2d(X)
            y_pred, y_var = mf_model.predict(X)
            if y_pred[0][0] > 0:
                return(y_pred[0][0])
            else:
                return(10**9)
        
        self.mf_opt = minimize(f_mf, self.hf_opt.x_opt, bounds = [i['domain'] for i in self.domain])
    
    def report_opt_results(self):
        mf = structure(self.hf_model, self.n, self.mf_opt.x, self.EI_req)
        self.results['mf score'] = mf.score
        self.results['mf mass'] = mf.mass
        self.results['mf EI'] = mf.EI
        self.results['mf equiv score'] = mf.equiv_score
        self.results['mf equiv EI'] = mf.equiv_EI
        hf = structure(self.hf_model, self.n, self.hf_opt.x_opt, self.EI_req)
        self.results['hf score'] = hf.score
        self.results['hf mass'] = hf.mass
        self.results['hf EI'] = hf.EI
        self.results['hf equiv score'] = hf.equiv_score
        self.results['hf equiv EI'] = hf.equiv_EI
        lf = structure(self.hf_model, self.n, self.lf_opt.x_opt, self.EI_req)
        self.results['lf score'] = lf.score
        self.results['lf mass'] = lf.mass
        self.results['lf EI'] = lf.EI
        self.results['lf equiv score'] = lf.equiv_score
        self.results['lf equiv EI'] = lf.equiv_EI