示例#1
0
def post_process_skopt_results(skopt_results, results, opt_path):
    mpl.rcParams.update(mpl.rcParamsDefault)

    skopt_plots(skopt_results, pref=opt_path)

    fname = os.path.join(opt_path, 'gp_parameters')

    sr_res = SerializeSKOptResults(skopt_results)

    if 'folder' in list(results.items())[0]:
        clear_weights(results=results, opt_dir=opt_path)
    else:
        clear_weights(results=results, opt_dir=opt_path)
        clear_weights(opt_dir=opt_path)

    try:
        dump(skopt_results, os.path.join(opt_path, os.path.basename(opt_path)))
    except PicklingError:
        print("could not pickle results")

    try:
        with open(fname + '.json', 'w') as fp:
            json.dump(sr_res.serialized_results, fp, sort_keys=True, indent=4)
    except TypeError:
        with open(fname + '.json', 'w') as fp:
            json.dump(str(sr_res.serialized_results),
                      fp,
                      sort_keys=True,
                      indent=4)

    return
示例#2
0
 def __call__(self, res):
     """
     Parameters
     ----------
     * `res` [`OptimizeResult`, scipy object]:
         The optimization as a OptimizeResult object.
     """
     dump(res, self.checkpoint_path, **self.dump_options)
示例#3
0
 def __call__(self, res):
     """
     Parameters
     ----------
     * `res` [`OptimizeResult`, scipy object]:
         The optimization as a OptimizeResult object.
     """
     dump(res, self.checkpoint_path, **self.dump_options)
示例#4
0
def _export_results_object(results):
    from io import BytesIO

    results.specs['args'].pop('callback', None)

    buffer = BytesIO()
    dump(results, buffer, store_objective=False)
    buffer.seek(0)

    return buffer
示例#5
0
def run(n_calls=50, n_runs=5, acq_optimizer="lbfgs"):
    bounds = [(-5.0, 10.0), (0.0, 15.0)]
    optimizers = [("gp_minimize", gp_minimize),
                  ("forest_minimize", forest_minimize),
                  ("gbrt_minimize", gbrt_minimize)]

    for name, optimizer in optimizers:
        print(name)
        results = []
        min_func_calls = []
        time_ = 0.0

        for random_state in range(n_runs):
            if name == "gp_minimize":
                res = optimizer(branin,
                                bounds,
                                random_state=random_state,
                                n_calls=n_calls,
                                noise=1e-10,
                                verbose=True,
                                acq_optimizer=acq_optimizer,
                                n_jobs=-1)
            else:
                res = optimizer(branin,
                                bounds,
                                random_state=random_state,
                                n_calls=n_calls,
                                acq_optimizer=acq_optimizer)
            results.append(res)
            print("Dumping results of run %d" % random_state)
            dump(res, "%d_run" % random_state)
            min_func_calls.append(np.argmin(res.func_vals) + 1)

        optimal_values = [result.fun for result in results]
        mean_optimum = np.mean(optimal_values)
        std = np.std(optimal_values)
        best = np.min(optimal_values)
        print("Mean optimum: " + str(mean_optimum))
        print("Std of optimal values" + str(std))
        print("Best optima:" + str(best))

        mean_fcalls = np.mean(min_func_calls)
        std_fcalls = np.std(min_func_calls)
        best_fcalls = np.min(min_func_calls)
        print("Mean func_calls to reach min: " + str(mean_fcalls))
        print("Std func_calls to reach min: " + str(std_fcalls))
        print("Fastest no of func_calls to reach min: " + str(best_fcalls))
示例#6
0
    def fit(self,
            X,
            Y,
            total_duration=6e7,
            n_iter=100,
            cv_iter=None,
            optimizer=None,
            acq_func='gp_hedge',
            **kwargs):
        start = datetime.now()

        def splitter(itr):
            for train_idx, test_idx in itr:
                yield X[train_idx], Y[train_idx], X[test_idx], Y[test_idx]

        def splitter_dict(itr_dict):

            n_splits = len(list(itr_dict.values())[0])
            for i in range(n_splits):
                X_train = dict()
                Y_train = dict()
                X_test = dict()
                Y_test = dict()
                for n_obj, itr in itr_dict.items():
                    train_idx = itr[i][0]
                    test_idx = itr[i][1]
                    X_train[n_obj] = np.copy(X[n_obj][train_idx])
                    X_test[n_obj] = np.copy(X[n_obj][test_idx])
                    Y_train[n_obj] = np.copy(Y[n_obj][train_idx])
                    Y_test[n_obj] = np.copy(Y[n_obj][test_idx])
                yield X_train, Y_train, X_test, Y_test

        if cv_iter is None:
            cv_iter = ShuffleSplit(n_splits=3,
                                   test_size=0.1,
                                   random_state=self.random_state)
        if isinstance(X, dict):
            splits = dict()
            for n_obj, arr in X.items():
                if arr.shape[0] == 1:
                    splits[n_obj] = [([0], [0])
                                     for i in range(cv_iter.n_splits)]
                else:
                    splits[n_obj] = list(cv_iter.split(arr))
        else:
            splits = list(cv_iter.split(X))
        # Pre-compute splits for reuse
        # Here we fix a random seed for all simulations to correlate the random
        # streams:

        seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the ranking algorithm: {}'.format(seed))
        opt_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug('Random seed for the optimizer: {}'.format(opt_seed))
        gp_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the GP surrogate: {}'.format(gp_seed))

        if optimizer is not None:
            opt = optimizer
            self.logger.debug('Setting the provided optimizer')
            self.log_best_params(opt)
        else:
            transformed = []
            for param in self.parameter_ranges:
                transformed.append(check_dimension(param))
            self.logger.info("Parameter Space: {}".format(transformed))
            space = normalize_dimensions(transformed)
            self.logger.info(
                "Parameter Space after transformation: {}".format(space))

            # Todo: Make this passable
            base_estimator = cook_estimator("GP",
                                            space=space,
                                            random_state=gp_seed,
                                            noise="gaussian")
            opt = Optimizer(dimensions=self.parameter_ranges,
                            random_state=opt_seed,
                            base_estimator=base_estimator,
                            acq_func=acq_func,
                            **kwargs)
        self._callbacks_set_optimizer(opt)
        self._callbacks_on_optimization_begin()
        time_taken = duration_tillnow(start)
        total_duration -= time_taken
        max_fit_duration = -10000
        self.logger.info('Time left for {} iterations is {}'.format(
            n_iter, microsec_to_time(total_duration)))

        try:
            for t in range(n_iter):
                start = datetime.now()
                self._callbacks_on_iteration_begin(t)
                self.logger.info(
                    'Starting optimization iteration: {}'.format(t))
                if t > 0:
                    self.log_best_params(opt)

                next_point = opt.ask()
                self.logger.info('Next parameters:\n{}'.format(next_point))
                results = []
                running_times = []
                if isinstance(X, dict):
                    for X_train, Y_train, X_test, Y_test in splitter_dict(
                            splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)
                else:
                    for X_train, Y_train, X_test, Y_test in splitter(splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)

                results = np.array(results)
                running_times = np.array(running_times)
                mean_result = np.mean(results)
                mean_fitting_duration = np.mean(running_times)

                # Storing the maximum time to run the splitting model and adding the time for out of sample evaluation
                if max_fit_duration < np.sum(running_times):
                    max_fit_duration = np.sum(running_times)

                self.logger.info(
                    'Validation error for the parameters is {:.4f}'.format(
                        mean_result))
                self.logger.info('Time taken for the parameters is {}'.format(
                    microsec_to_time(np.sum(running_times))))
                if "ps" in opt.acq_func:
                    opt.tell(next_point, [mean_result, mean_fitting_duration])
                else:
                    opt.tell(next_point, mean_result)
                self._callbacks_on_iteration_end(t)

                self.logger.info(
                    "Main optimizer iterations done {} and saving the model".
                    format(np.array(opt.yi).shape[0]))
                dump(opt, self.optimizer_path)

                time_taken = duration_tillnow(start)
                total_duration -= time_taken
                self.logger.info('Time left for simulations is {} '.format(
                    microsec_to_time(total_duration)))

                if (total_duration - max_fit_duration) < 0:
                    self.logger.info(
                        'At iteration {} maximum time required by model to validate a parameter values'
                        .format(microsec_to_time(max_fit_duration)))
                    self.logger.info(
                        'At iteration {} simulation stops, due to time deficiency'
                        .format(t))
                    break

        except KeyboardInterrupt:
            self.logger.debug(
                'Optimizer interrupted saving the model at {}'.format(
                    self.optimizer_path))
            self.log_best_params(opt)
        else:
            self.logger.debug(
                'Finally, fit a model on the complete training set and storing the model at {}'
                .format(self.optimizer_path))
            self._fit_params["epochs"] = self._fit_params.get("epochs", 1000)
            if "ps" in opt.acq_func:
                best_point = opt.Xi[np.argmin(np.array(opt.yi)[:, 0])]
            else:
                best_point = opt.Xi[np.argmin(opt.yi)]
            self._set_new_parameters(best_point)
            self.model = copy.copy(self.ranker)
            self.model.fit(X, Y, **self._fit_params)

        finally:
            self._callbacks_on_optimization_end()
            self.optimizer = opt
            if np.array(opt.yi).shape[0] != 0:
                dump(opt, self.optimizer_path)
import json

import adaptive
from skopt import gp_minimize
from skopt.utils import dump

from openmc_model import objective

# Optimisation for 2D EXAMPLE

# Uses adaptive sampling methods from task 8 to obtain starting points for the optimiser
learner = adaptive.Learner2D(objective, bounds=[(0, 100), (0, 100)])
runner = adaptive.Runner(learner, ntasks=1, goal=lambda l: l.npoints > 30)
runner.ioloop.run_until_complete(runner.task)

# Gaussian Processes based optimisation that returns an SciPy optimisation object
res = gp_minimize(
    objective,  # the function to minimize
    dimensions=[(0., 100.), (0., 100.)],  # the bounds on each dimension of x
    n_calls=40,  # the number of evaluations of f
    n_random_starts=0,  # the number of random initialization points
    verbose=True,
    x0=[i for i in list(learner.data.keys())
        ],  # initial data from the adaptive sampling method
    y0=list(learner.data.values()
            )  # initial data from the adaptive sampling method
)

# Saves the optimisation simulation reults to a file
dump(res, 'saved_optimisation_2d.dat')
示例#8
0
    def fit(self,
            X,
            Y,
            total_duration=600,
            n_iter=100,
            cv_iter=None,
            acq_func='gp_hedge',
            **kwargs):
        start = datetime.now()

        def splitter(itr):
            for train_idx, test_idx in itr:
                yield X[train_idx], Y[train_idx], X[test_idx], Y[test_idx]

        def splitter_dict(itr_dict):
            n_splits = len(list(itr_dict.values())[0])
            for i in range(n_splits):
                X_train = dict()
                Y_train = dict()
                X_test = dict()
                Y_test = dict()
                for n_obj, itr in itr_dict.items():
                    train_idx = itr[i][0]
                    test_idx = itr[i][1]
                    X_train[n_obj] = np.copy(X[n_obj][train_idx])
                    X_test[n_obj] = np.copy(X[n_obj][test_idx])
                    Y_train[n_obj] = np.copy(Y[n_obj][train_idx])
                    Y_test[n_obj] = np.copy(Y[n_obj][test_idx])
                yield X_train, Y_train, X_test, Y_test

        if cv_iter is None:
            cv_iter = ShuffleSplit(n_splits=3,
                                   test_size=0.1,
                                   random_state=self.random_state)
        if isinstance(X, dict):
            splits = dict()
            for n_obj, arr in X.items():
                if arr.shape[0] == 1:
                    splits[n_obj] = [([0], [0])
                                     for i in range(cv_iter.n_splits)]
                else:
                    splits[n_obj] = list(cv_iter.split(arr))
        else:
            splits = list(cv_iter.split(X))
        # Pre-compute splits for reuse
        # Here we fix a random seed for all simulations to correlate the random
        # streams:

        seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the ranking algorithm: {}'.format(seed))
        opt_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug('Random seed for the optimizer: {}'.format(opt_seed))
        gp_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the GP surrogate: {}'.format(gp_seed))
        n_iter = self.set_optimizer(n_iter, opt_seed, acq_func, gp_seed,
                                    **kwargs)

        self._callbacks_set_optimizer(self.opt)
        self._callbacks_on_optimization_begin()
        time_taken = duration_till_now(start)
        total_duration -= time_taken
        max_fit_duration = -np.inf
        self.logger.info('Time left for {} iterations is {}'.format(
            n_iter, seconds_to_time(total_duration)))

        try:
            for t in range(n_iter):
                if total_duration <= 0:
                    break
                start = datetime.now()
                self._callbacks_on_iteration_begin(t)
                self.logger.info(
                    'Starting optimization iteration: {}'.format(t))
                if t > 0:
                    self.log_best_params()
                next_point = self.opt.ask()
                self.logger.info('Next parameters:\n{}'.format(next_point))
                results = []
                running_times = []
                if isinstance(X, dict):
                    for X_train, Y_train, X_test, Y_test in splitter_dict(
                            splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)
                else:
                    for X_train, Y_train, X_test, Y_test in splitter(splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)

                results = np.array(results)
                running_times = np.array(running_times)
                mean_result = np.mean(results)
                mean_fitting_duration = np.mean(running_times)

                # Storing the maximum time to run the splitting model and adding the time for out of sample evaluation
                if max_fit_duration < np.sum(running_times):
                    max_fit_duration = np.sum(running_times)

                self.logger.info(
                    'Validation error for the parameters is {:.4f}'.format(
                        mean_result))
                self.logger.info('Time taken for the parameters is {}'.format(
                    seconds_to_time(np.sum(running_times))))
                if "ps" in self.opt.acq_func:
                    self.opt.tell(next_point,
                                  [mean_result, mean_fitting_duration])
                else:
                    self.opt.tell(next_point, mean_result)
                self._callbacks_on_iteration_end(t)

                self.logger.info(
                    "Main optimizer iterations done {} and saving the model".
                    format(np.array(self.opt.yi).shape[0]))
                dump(self.opt, self.optimizer_path)

                time_taken = duration_till_now(start)
                total_duration -= time_taken
                self.logger.info('Time left for simulations is {} '.format(
                    seconds_to_time(total_duration)))

                # Delete Tensorflow graph, to prevent memory leaks:
                K.clear_session()
                sess = tf.Session()
                K.set_session(sess)
                if (total_duration - max_fit_duration) < 0:
                    self.logger.info(
                        'Maximum time required by model to validate parameter values {}'
                        .format(seconds_to_time(max_fit_duration)))
                    self.logger.info(
                        'At iteration {} simulation stops, due to time deficiency'
                        .format(t))
                    break

        except KeyboardInterrupt:
            self.logger.debug(
                'Optimizer interrupted saving the model at {}'.format(
                    self.optimizer_path))
            self.log_best_params()
        else:
            self.logger.debug(
                'Finally, fit a model on the complete training set and storing the model at {}'
                .format(self.optimizer_path))

        finally:
            K.clear_session()
            sess = tf.Session()
            K.set_session(sess)
            self._callbacks_on_optimization_end()
            # self._fit_params["epochs"] = np.min([self._fit_params.get("epochs", 500) * 2, 1000])
            if "ps" in self.opt.acq_func:
                best_point = self.opt.Xi[np.argmin(
                    np.array(self.opt.yi)[:, 0])]
            else:
                best_point = self.opt.Xi[np.argmin(self.opt.yi)]
            self._set_new_parameters(best_point)
            self.model = copy.copy(self.learner)
            self.model.fit(X, Y, **self._fit_params)
            if np.array(self.opt.yi).shape[0] != 0:
                dump(self.opt, self.optimizer_path)
示例#9
0
    'optimizer': torch.optim.Adam,
    'distance_metric': 'cosine'
}
mini_func = gp_minimize
optimize_types = ['subspace_dim', 'beta', 'lr', 'mini_batch_size']
minimizer = Minimizer(base_workspace, optimize_types, mini_func)

dims = [4, 8, 16, 32, 64, 128]
for dim in dims:

    # order should be the same as the "optimize_types"
    space = [
        Categorical([dim]),
        Integer(1, 30),
        Real(10**-5, 10**0, "log-uniform"),
        Categorical([32, 64, 128, 256, 512])
    ]

    x0 = [dim, 6, 0.005, 512]
    checkpoint_fname = '_'.join([emb_fname, str(dim)]) + '_checkpoint.pkl'
    checkpoint_callback = CheckpointSaver(checkpoint_fname,
                                          store_objective=False)

    res = minimizer.minimize(space,
                             n_calls=30,
                             verbose=True,
                             x0=x0,
                             callback=[checkpoint_callback])
    results_fname = '_'.join(['results', emb_fname, str(dim)])
    dump(res, results_fname + '.pkl', store_objective=False)
示例#10
0
# this decorator allows your objective function to receive a the parameters as
# keyword arguments. This is particularly convenient when you want to set scikit-learn
# estimator parameters
@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(
        cross_val_score(
            reg, X, y, cv=5, n_jobs=-1, scoring="neg_mean_absolute_error"))


from skopt import gp_minimize

# checkpoint_saver = CheckpointSaver("test_checkpoint.pkl", store_objective=False)

# res_gp = gp_minimize(objective, space, n_calls=50, callback=[checkpoint_saver], random_state=0)
# res_gp.specs['args']['func'] = None
# dump(res_gp,'test_skopt.pkl')

res = load('test_checkpoint.pkl')
res_gp = gp_minimize(objective,
                     space,
                     x0=res.x_iters,
                     y0=res.func_vals,
                     n_calls=10,
                     random_state=0,
                     verbose=True)
res_gp.specs['args']['func'] = None
dump(res_gp, 'test_skopt2.pkl')
    def hyper_parameter_optimization_process(
            self,
            train_x: 'np.array',
            train_y: 'np.array',
            val_x: 'np.array',
            val_y: 'np.array',
            model_epochs: int = 10,
            n_calls: int = 11,
            acq_func: str = 'gp_hedge',
            verbose: bool = True,
            kappa: float = 1.96,
            noise: float = 0.01,
            n_jobs: int = -1) -> 'Skopt HPO Object':
        """Conducts hyperparameter optimization process & return its results"""

        # Initializing concerned hyper-parameters
        dim_learning_rate = Real(low=1e-4,
                                 high=1e-2,
                                 prior='log-uniform',
                                 name='learning_rate')
        dim_num_lstm_nodes = Integer(low=20, high=300, name='num_lstm_nodes')
        dim_dropout = Real(low=0.1,
                           high=0.9,
                           prior='log-uniform',
                           name='dropout')
        dim_batch_size = Integer(low=1, high=128, name='batch_size')

        # Setting default values for the concerned hyperparameters
        default_parameters = [1e-3, 50, 0.1, 64]
        dimensions = [
            dim_learning_rate, dim_num_lstm_nodes, dim_dropout, dim_batch_size
        ]

        # Fitness function as objective function served to HPO process
        @use_named_args(dimensions=dimensions)
        def fitness(learning_rate, num_lstm_nodes, dropout, batch_size):

            # Setting seed and clearing model graphs in backend
            tf.random.set_seed(SimpleCNN.seed_num)
            K.clear_session()
            tf.compat.v1.reset_default_graph()

            # Initializing model, compiling & training it
            model = self.generate_model(use_optimised_hyperparameters=False,
                                        dropout=dropout,
                                        number_of_lstm_nodes=num_lstm_nodes)
            optimizer = SGD(learning_rate=learning_rate)
            model.compile(loss='mean_squared_error', optimizer=optimizer)
            model.fit(train_x,
                      train_y,
                      batch_size=batch_size,
                      epochs=model_epochs,
                      verbose=2,
                      shuffle=False,
                      validation_data=(val_x, val_y))

            # Generating prediction on Validation data
            validation_data_prediction = model.predict(val_x)
            # Calculating MSE for the model trained with candidate Hyperparameters of this iteration
            mse_validation = mean_squared_error(val_y,
                                                validation_data_prediction)

            # Deleting created model
            del model
            return mse_validation

        hpo_result = gp_minimize(func=fitness,
                                 dimensions=dimensions,
                                 acq_func=acq_func,
                                 n_calls=n_calls,
                                 noise=noise,
                                 n_jobs=n_jobs,
                                 kappa=kappa,
                                 x0=default_parameters,
                                 verbose=verbose)

        self.hpo_result = hpo_result
        # Storing optimised Hyperparameters
        dump(hpo_result,
             self.path_to_save_hyperparameters,
             store_objective=False)
        # Generating Dataframe of all HPO process candidates and their MSE
        hpo_iterations_df = mu.generate_hyperparameter_optimization_iterations_df(
            hpo_result=hpo_result,
            columns=["Learning Rate", "# LSTM Nodes", "Dropout", "Batch Size"])
        # Saving all HPO plots
        dvu.save_all_hpo_plots(hpo_result=hpo_result,
                               hpo_iterations_df=hpo_iterations_df,
                               path_map=self.path_to_save_hpo_plots)

        return hpo_result
import adaptive
from skopt.utils import dump

from openmc_model import objective

# Optimisation for 1D example
learner = adaptive.SKOptLearner(
    objective,
    dimensions=[(0., 100.)],
    base_estimator="GP",
    acq_func="gp_hedge",
    acq_optimizer="lbfgs",
)
runner = adaptive.Runner(learner, ntasks=1, goal=lambda l: l.npoints > 40)

runner.ioloop.run_until_complete(runner.task)

dump(runner.learner, 'saved_optimisation_1d.dat')
示例#13
0
#1. first load existed optimization model or build a new model
# when we first time use model
opt = None
if os.path.exists(model_filename):
    opt = skopt_utils.load(model_filename)
    print("read model")
else:
    # default setting: there are three workers for each application, cpu range is from 200m to 500m.
    restriction=[]
    for i in range(len(app_name)):
        for j in range(3):
            restriction += [(100, 3950)]
    opt = Optimizer(restriction, n_initial_points=5, acq_func="gp_hedge", base_estimator="GP")
    app_info, keys = read_container_info()    
    ask_BO([], keys) 
    skopt_utils.dump(opt, model_filename)
    sys.exit()

#3. read input data from file'
app_info, keys = read_container_info()
cpu = []
measured = []

measured = read_measured_data()

y, stop = bo_function(app_info)
if stop == False:
    opt.tell(measured, y)
    print(y, stop)
    print("-----")
    ask_BO(measured, keys)