def test_mk_observer_unlabelled() -> None: def foo(x: tf.Tensor) -> tf.Tensor: return x + 1 x_ = tf.constant([[3.0]]) ys = mk_observer(foo)(x_) npt.assert_array_equal(ys.query_points, x_) npt.assert_array_equal(ys.observations, x_ + 1)
def test_mk_observer() -> None: def foo(x: tf.Tensor) -> tf.Tensor: return x + 1 x_ = tf.constant([[3.0]]) ys = mk_observer(foo, "bar")(x_) assert ys.keys() == {"bar"} npt.assert_array_equal(ys["bar"].query_points, x_) npt.assert_array_equal(ys["bar"].observations, x_ + 1)
def branin_dataset(num_query_points: int) -> Dataset: """ Generate example dataset based on Hartmann 6 objective function. :param num_query_points: A number of samples from the objective function. :return: A dataset. """ search_space = Box([0, 0], [1, 1]) query_points = search_space.sample(num_query_points) observer = mk_observer(branin, OBJECTIVE) data = observer(query_points) return data[OBJECTIVE]
def test_optimizer_finds_minima_of_the_scaled_branin_function( num_steps: int, acquisition_rule: AcquisitionRule[TensorType, SearchSpace] | AcquisitionRule[State[TensorType, AsynchronousGreedy.State | TrustRegion.State], Box], ) -> None: search_space = BRANIN_SEARCH_SPACE def build_model(data: Dataset) -> GaussianProcessRegression: variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance, tf.constant([0.2, 0.2], tf.float64)) scale = tf.constant(1.0, dtype=tf.float64) kernel.variance.prior = tfp.distributions.LogNormal( tf.constant(-2.0, dtype=tf.float64), scale ) kernel.lengthscales.prior = tfp.distributions.LogNormal( tf.math.log(kernel.lengthscales), scale ) gpr = gpflow.models.GPR((data.query_points, data.observations), kernel, noise_variance=1e-5) gpflow.utilities.set_trainable(gpr.likelihood, False) return GaussianProcessRegression(gpr) initial_query_points = search_space.sample(5) observer = mk_observer(scaled_branin) initial_data = observer(initial_query_points) model = build_model(initial_data) dataset = ( BayesianOptimizer(observer, search_space) .optimize(num_steps, initial_data, model, acquisition_rule) .try_get_final_dataset() ) arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0)) best_y = dataset.observations[arg_min_idx] best_x = dataset.query_points[arg_min_idx] relative_minimizer_err = tf.abs((best_x - BRANIN_MINIMIZERS) / BRANIN_MINIMIZERS) # these accuracies are the current best for the given number of optimization steps, which makes # this is a regression test assert tf.reduce_any(tf.reduce_all(relative_minimizer_err < 0.05, axis=-1), axis=0) npt.assert_allclose(best_y, SCALED_BRANIN_MINIMUM, rtol=0.005) # check that acquisition functions defined as classes aren't being retraced unnecessarily if isinstance(acquisition_rule, EfficientGlobalOptimization): acquisition_function = acquisition_rule._acquisition_function if isinstance(acquisition_function, AcquisitionFunctionClass): assert acquisition_function.__call__._get_tracing_count() == 3 # type: ignore
def test_two_layer_dgp_optimizer_finds_minima_of_michalewicz_function( num_steps: int, acquisition_rule: AcquisitionRule[TensorType, SearchSpace], keras_float: None ) -> None: # this unit test fails sometimes for # normal search space used with MICHALEWICZ function # so for stability we reduce its size here search_space = Box(MICHALEWICZ_2_MINIMIZER[0] - 0.5, MICHALEWICZ_2_MINIMIZER[0] + 0.5) def build_model(data: Dataset) -> DeepGaussianProcess: epochs = int(2e3) batch_size = 100 dgp = two_layer_dgp_model(data.query_points) def scheduler(epoch: int, lr: float) -> float: if epoch == epochs // 2: return lr * 0.1 else: return lr optimizer = tf.optimizers.Adam(0.01) fit_args = { "batch_size": batch_size, "epochs": epochs, "verbose": 0, "callbacks": tf.keras.callbacks.LearningRateScheduler(scheduler), } return DeepGaussianProcess(model=dgp, optimizer=optimizer, fit_args=fit_args) initial_query_points = search_space.sample(50) observer = mk_observer(michalewicz, OBJECTIVE) initial_data = observer(initial_query_points) model = build_model(initial_data[OBJECTIVE]) dataset = ( BayesianOptimizer(observer, search_space) .optimize(num_steps, initial_data, {OBJECTIVE: model}, acquisition_rule, track_state=False) .try_get_final_dataset() ) arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0)) best_y = dataset.observations[arg_min_idx] best_x = dataset.query_points[arg_min_idx] relative_minimizer_err = tf.abs((best_x - MICHALEWICZ_2_MINIMIZER) / MICHALEWICZ_2_MINIMIZER) assert tf.reduce_all(relative_minimizer_err < 0.03, axis=-1) npt.assert_allclose(best_y, MICHALEWICZ_2_MINIMUM, rtol=0.03)
def test_multi_objective_optimizer_finds_pareto_front_of_the_VLMOP2_function( num_steps: int, acquisition_rule: AcquisitionRule[TensorType, Box], convergence_threshold: float) -> None: search_space = Box([-2, -2], [2, 2]) def build_stacked_independent_objectives_model( data: Dataset) -> ModelStack: gprs = [] for idx in range(2): single_obj_data = Dataset( data.query_points, tf.gather(data.observations, [idx], axis=1)) variance = tf.math.reduce_variance(single_obj_data.observations) kernel = gpflow.kernels.Matern52( variance, tf.constant([0.2, 0.2], tf.float64)) gpr = gpflow.models.GPR(single_obj_data.astuple(), kernel, noise_variance=1e-5) gpflow.utilities.set_trainable(gpr.likelihood, False) gprs.append((GaussianProcessRegression(gpr), 1)) return ModelStack(*gprs) observer = mk_observer(VLMOP2().objective(), OBJECTIVE) initial_query_points = search_space.sample(10) initial_data = observer(initial_query_points) model = build_stacked_independent_objectives_model(initial_data[OBJECTIVE]) dataset = (BayesianOptimizer(observer, search_space).optimize( num_steps, initial_data, { OBJECTIVE: model }, acquisition_rule).try_get_final_datasets()[OBJECTIVE]) # A small log hypervolume difference corresponds to a succesful optimization. ref_point = get_reference_point(dataset.observations) obs_hv = Pareto(dataset.observations).hypervolume_indicator(ref_point) ideal_pf = tf.cast(VLMOP2().gen_pareto_optimal_points(100), dtype=tf.float64) ideal_hv = Pareto(ideal_pf).hypervolume_indicator(ref_point) assert tf.math.log(ideal_hv - obs_hv) < convergence_threshold
# As an example, we will be searching for a minimum of a 10-dimensional [Trid function](https://www.sfu.ca/~ssurjano/trid.html). The range of variation of the Trid function values is large. It varies from values of $10^5$ to its global minimum $f(x^∗) = −210$. This large variation range makes it difficult for Bayesian optimization with Gaussian processes to find the global minimum. However, with data normalisation it becomes possible (see <cite data-cite="hebbal2019bayesian">[Hebbal et al. 2019](https://arxiv.org/abs/1905.03350)</cite>). # %% function = trid_10 F_MINIMUM = TRID_10_MINIMUM search_space = TRID_10_SEARCH_SPACE # %% [markdown] # ## Collect initial points # # We set up the observer as usual over the Trid function search space, using Sobol sampling to sample the initial points. # %% num_initial_points = 50 observer = mk_observer(function) initial_query_points = search_space.sample_sobol(num_initial_points) initial_data = observer(initial_query_points) # %% [markdown] # ## Model the objective function # # The Bayesian optimization procedure estimates the next best points to query by using a probabilistic model of the objective. We'll use a Gaussian process (GP) model, built using GPflow. The model will need to be trained on each step as more points are evaluated, so we'll package it with GPflow's Scipy optimizer. # # Here as the first example, we model the objective function using the original data, without performing any data transformation. In the next example we will model it using normalised data. # # We also put priors on the parameters of our GP model's kernel in order to stabilize model fitting. We found the priors below to be highly effective for objective functions defined over the unit hypercube and with an output normalised to have zero mean and unit variance. Since the non-normalised data from the original objective function comes with different scaling, we rescale the priors based on approximate standard deviation of inputs and outputs. # %%
def build_model(data, kernel_func=None): """kernel_func should be a function that takes variance as a single input parameter""" variance = tf.math.reduce_variance(data.observations) if kernel_func is None: kernel = gpflow.kernels.Matern52(variance=variance) else: kernel = kernel_func(variance) gpr = gpflow.models.GPR(data.astuple(), kernel, noise_variance=1e-5) gpflow.set_trainable(gpr.likelihood, False) return GaussianProcessRegression(gpr) num_initial_points = 5 initial_query_points = search_space.sample(num_initial_points) observer = mk_observer(scaled_branin) initial_data = observer(initial_query_points) # %% [markdown] # ## Timing acquisition function: simple use case for Ask-Tell # # Let's say we are very concerned with the performance of the acqusition function, and want a simple way of measuring its performance over the course of the optimization. At the time of writing these lines, regular Trieste's optimizer does not provide such customization functionality, and this is where Ask-Tell comes in handy. # %% import timeit model = build_model(initial_data) ask_tell = AskTellOptimizer(search_space, initial_data, model) for step in range(n_steps): start = timeit.default_timer()
def test_ask_tell_optimization_finds_minima_of_the_scaled_branin_function( num_steps: int, reload_state: bool, acquisition_rule_fn: Callable[[], AcquisitionRule[TensorType, SearchSpace]] | Callable[[], AcquisitionRule[State[TensorType, AsynchronousGreedy.State | TrustRegion.State], Box], ], ) -> None: # For the case when optimization state is saved and reload on each iteration # we need to use new acquisition function object to imitate real life usage # hence acquisition rule factory method is passed in, instead of a rule object itself # it is then called to create a new rule whenever needed in the test search_space = BRANIN_SEARCH_SPACE def build_model(data: Dataset) -> GaussianProcessRegression: variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance, tf.constant([0.2, 0.2], tf.float64)) scale = tf.constant(1.0, dtype=tf.float64) kernel.variance.prior = tfp.distributions.LogNormal( tf.constant(-2.0, dtype=tf.float64), scale) kernel.lengthscales.prior = tfp.distributions.LogNormal( tf.math.log(kernel.lengthscales), scale) gpr = gpflow.models.GPR((data.query_points, data.observations), kernel, noise_variance=1e-5) gpflow.utilities.set_trainable(gpr.likelihood, False) return GaussianProcessRegression(gpr) initial_query_points = search_space.sample(5) observer = mk_observer(scaled_branin) initial_data = observer(initial_query_points) model = build_model(initial_data) ask_tell = AskTellOptimizer(search_space, initial_data, model, acquisition_rule_fn()) for _ in range(num_steps): # two scenarios are tested here, depending on `reload_state` parameter # in first the same optimizer object is always used # in second new optimizer is created at each step from saved state new_point = ask_tell.ask() if reload_state: state: Record[None | State[TensorType, AsynchronousGreedy.State | TrustRegion.State]] = ask_tell.to_record() written_state = pickle.dumps(state) new_data_point = observer(new_point) if reload_state: state = pickle.loads(written_state) ask_tell = AskTellOptimizer.from_record(state, search_space, acquisition_rule_fn()) ask_tell.tell(new_data_point) result: OptimizationResult[None | State[ TensorType, AsynchronousGreedy.State | TrustRegion.State]] = ask_tell.to_result() dataset = result.try_get_final_dataset() arg_min_idx = tf.squeeze(tf.argmin(dataset.observations, axis=0)) best_y = dataset.observations[arg_min_idx] best_x = dataset.query_points[arg_min_idx] relative_minimizer_err = tf.abs( (best_x - BRANIN_MINIMIZERS) / BRANIN_MINIMIZERS) # these accuracies are the current best for the given number of optimization steps, which makes # this is a regression test assert tf.reduce_any(tf.reduce_all(relative_minimizer_err < 0.05, axis=-1), axis=0) npt.assert_allclose(best_y, SCALED_BRANIN_MINIMUM, rtol=0.005)