def test_probability_of_improvement(random_state, input, expected): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=0, random_state=random_state) opt.tell([[-2.0], [-1.0], [0.0], [1.0], [2.0]], [2.0, 0.0, -2.0, 0.0, 2.0], gp_burnin=10) prob = opt.probability_of_optimality( threshold=input["threshold"], n_random_starts=20, random_state=random_state, normalized_scores=input["normalized_scores"], ) np.testing.assert_almost_equal(prob, expected, decimal=2)
def test_noise_vector(): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=5) opt.tell( [[-2.0], [-1.0], [0.0], [1.0], [2.0]], [0.0, -1.0, 0.0, -1.0, 0.0], noise_vector=[1.0, 1.0, 1.0, 0.0, 1.0], ) # Test, if the less noisy optimum (at 1.0) had a stronger impact on the mean process # than the noisy optimum (at -1.0): y_noisy, y = opt.gp.predict([[-1.0], [1.0]]) assert y_noisy > y # Check, if passing a single point works correctly: x = opt.ask() opt.tell(x, 0.0, noise_vector=0.5)
def test_expected_optimality_gap(random_state, input, expected): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=0, random_state=random_state) opt.tell([[-2.0], [-1.0], [0.0], [1.0], [2.0]], [2.0, 0.0, -2.0, 0.0, 2.0], gp_burnin=10) gap = opt.expected_optimality_gap( random_state=random_state, n_probabilities=10, n_space_samples=100, n_gp_samples=100, n_random_starts=10, tol=0.1, use_mean_gp=input["use_mean_gp"], normalized_scores=input["normalized_scores"], ) np.testing.assert_almost_equal(gap, expected, decimal=2)
def test_optimum_intervals(): opt = Optimizer(dimensions=[(0.0, 1.0)], random_state=0, acq_func="mean", n_points=100) x = np.linspace(0, 1, num=20)[:, None] y = np.cos(np.pi * 4 * x).flatten() + opt.rng.randn(20) * 0.1 opt.tell(x.tolist(), y.tolist(), gp_burnin=20, progress=False, n_samples=1) intervals = opt.optimum_intervals(random_state=0, space_samples=100) assert len(intervals) == 1 assert len(intervals[0]) >= 2 assert len(intervals[0][0]) == 2 intervals = opt.optimum_intervals(random_state=0, space_samples=100, multimodal=False) assert len(intervals) == 1 assert len(intervals[0]) == 2
def test_initial_points(): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=5) x = opt.ask() opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 1 opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 2 opt.tell([[0.1], [0.2], [0.3]], [0.0, 0.1, 0.2], replace=True) assert opt._n_initial_points == opt.n_initial_points_ - 3
def test_initial_points(init_strategy): opt = Optimizer(dimensions=[(-2.0, 2.0)], n_initial_points=3, init_strategy=init_strategy) x = opt.ask() assert not isinstance(x[0], list) opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 1 opt.tell([x], [0.0]) assert opt._n_initial_points == opt.n_initial_points_ - 2 assert opt.gp.chain_ is None opt.tell([[0.1], [0.2], [0.3]], [0.0, 0.1, 0.2], replace=True) assert opt._n_initial_points == opt.n_initial_points_ - 3 assert opt.gp.chain_ is not None
def local( # noqa: C901 tuning_config, acq_function="mes", acq_function_samples=1, confidence=0.9, data_path=None, gp_burnin=5, gp_samples=300, gp_initial_burnin=100, gp_initial_samples=300, logfile="log.txt", n_initial_points=30, n_points=500, plot_every=5, plot_path="plots", random_seed=0, result_every=5, resume=True, verbose=False, ): """Run a local tune. Parameters defined in the `tuning_config` file always take precedence. """ json_dict = json.load(tuning_config) settings, commands, fixed_params, param_ranges = load_tuning_config(json_dict) log_level = logging.DEBUG if verbose else logging.INFO log_format = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") root_logger = logging.getLogger() root_logger.setLevel(log_level) file_logger = logging.FileHandler(settings.get("logfile", logfile)) file_logger.setFormatter(log_format) root_logger.addHandler(file_logger) console_logger = logging.StreamHandler(sys.stdout) console_logger.setFormatter(log_format) root_logger.addHandler(console_logger) logging.debug(f"Got the following tuning settings:\n{json_dict}") # 1. Create seed sequence ss = np.random.SeedSequence(settings.get("random_seed", random_seed)) # 2. Create kernel # 3. Create optimizer random_state = np.random.RandomState(np.random.MT19937(ss.spawn(1)[0])) opt = Optimizer( dimensions=list(param_ranges.values()), n_points=settings.get("n_points", n_points), n_initial_points=settings.get("n_initial_points", n_initial_points), # gp_kernel=kernel, # TODO: Let user pass in different kernels gp_kwargs=dict(normalize_y=True), # gp_priors=priors, # TODO: Let user pass in priors acq_func=settings.get("acq_function", acq_function), acq_func_kwargs=dict(alpha="inf", n_thompson=20), random_state=random_state, ) X = [] y = [] noise = [] iteration = 0 # 3.1 Resume from existing data: if data_path is None: data_path = "data.npz" if resume: path = pathlib.Path(data_path) if path.exists(): with np.load(path) as importa: X = importa["arr_0"].tolist() y = importa["arr_1"].tolist() noise = importa["arr_2"].tolist() if len(X[0]) != opt.space.n_dims: logging.error( "The number of parameters are not matching the number of " "dimensions. Rename the existing data file or ensure that the " "parameter ranges are correct." ) sys.exit(1) reduction_needed, X_reduced, y_reduced, noise_reduced = reduce_ranges( X, y, noise, opt.space ) if reduction_needed: backup_path = path.parent / ( path.stem + f"_backup_{int(time.time())}" + path.suffix ) logging.warning( f"The parameter ranges are smaller than the existing data. " f"Some points will have to be discarded. " f"The original {len(X)} data points will be saved to " f"{backup_path}" ) np.savez_compressed( backup_path, np.array(X), np.array(y), np.array(noise) ) X = X_reduced y = y_reduced noise = noise_reduced iteration = len(X) logging.info( f"Importing {iteration} existing datapoints. This could take a while..." ) opt.tell( X, y, noise_vector=noise, gp_burnin=settings.get("gp_initial_burnin", gp_initial_burnin), gp_samples=settings.get("gp_initial_samples", gp_initial_samples), n_samples=settings.get("n_samples", 1), progress=True, ) logging.info("Importing finished.") # 4. Main optimization loop: while True: logging.info("Starting iteration {}".format(iteration)) result_every_n = settings.get("result_every", result_every) if ( result_every_n > 0 and iteration % result_every_n == 0 and opt.gp.chain_ is not None ): result_object = create_result(Xi=X, yi=y, space=opt.space, models=[opt.gp]) try: best_point, best_value = expected_ucb(result_object, alpha=0.0) best_point_dict = dict(zip(param_ranges.keys(), best_point)) logging.info(f"Current optimum:\n{best_point_dict}") logging.info(f"Estimated value: {best_value}") confidence_val = settings.get("confidence", confidence) confidence_out = confidence_intervals( optimizer=opt, param_names=list(param_ranges.keys()), hdi_prob=confidence_val, opt_samples=1000, multimodal=False, ) logging.info( f"{confidence_val*100}% confidence intervals:\n{confidence_out}" ) except ValueError: logging.info( "Computing current optimum was not successful. " "This can happen in rare cases and running the " "tuner again usually works." ) plot_every_n = settings.get("plot_every", plot_every) if ( plot_every_n > 0 and iteration % plot_every_n == 0 and opt.gp.chain_ is not None ): logging.getLogger("matplotlib.font_manager").disabled = True if opt.space.n_dims == 1: logging.warning( "Plotting for only 1 parameter is not supported yet." ) else: logging.debug("Starting to compute the next plot.") result_object = create_result( Xi=X, yi=y, space=opt.space, models=[opt.gp] ) plt.style.use("dark_background") fig, ax = plt.subplots( nrows=opt.space.n_dims, ncols=opt.space.n_dims, figsize=(3 * opt.space.n_dims, 3 * opt.space.n_dims), ) fig.patch.set_facecolor("#36393f") for i in range(opt.space.n_dims): for j in range(opt.space.n_dims): ax[i, j].set_facecolor("#36393f") timestr = time.strftime("%Y%m%d-%H%M%S") plot_objective( result_object, dimensions=list(param_ranges.keys()), fig=fig, ax=ax ) plotpath = pathlib.Path(settings.get("plot_path", plot_path)) plotpath.mkdir(parents=True, exist_ok=True) full_plotpath = plotpath / f"{timestr}-{iteration}.png" plt.savefig( full_plotpath, pad_inches=0.1, dpi=300, bbox_inches="tight", facecolor="#36393f", ) logging.info(f"Saving a plot to {full_plotpath}.") plt.close(fig) point = opt.ask() point_dict = dict(zip(param_ranges.keys(), point)) logging.info("Testing {}".format(point_dict)) engine_json = prepare_engines_json(commands=commands, fixed_params=fixed_params) logging.debug(f"engines.json is prepared:\n{engine_json}") write_engines_json(engine_json, point_dict) logging.info("Start experiment") now = datetime.now() out_exp, out_exp_err = run_match(**settings) later = datetime.now() difference = (later - now).total_seconds() logging.info(f"Experiment finished ({difference}s elapsed).") logging.debug(f"Raw result:\n{out_exp}\n{out_exp_err}") score, error = parse_experiment_result(out_exp, **settings) logging.info("Got score: {} +- {}".format(score, error)) logging.info("Updating model") while True: try: now = datetime.now() # We fetch kwargs manually here to avoid collisions: n_samples = settings.get("acq_function_samples", acq_function_samples) gp_burnin = settings.get("gp_burnin", gp_burnin) gp_samples = settings.get("gp_samples", gp_samples) if opt.gp.chain_ is None: gp_burnin = settings.get("gp_initial_burnin", gp_initial_burnin) gp_samples = settings.get("gp_initial_samples", gp_initial_samples) opt.tell( point, score, n_samples=n_samples, gp_samples=gp_samples, gp_burnin=gp_burnin, ) else: opt.tell( point, score, n_samples=n_samples, gp_samples=gp_samples, gp_burnin=gp_burnin, ) later = datetime.now() difference = (later - now).total_seconds() logging.info(f"GP sampling finished ({difference}s)") logging.debug(f"GP kernel: {opt.gp.kernel_}") except ValueError: logging.warning( "Error encountered during fitting. Trying to sample chain a bit. " "If this problem persists, restart the tuner to reinitialize." ) opt.gp.sample(n_burnin=5, priors=opt.gp_priors) else: break X.append(point) y.append(score) noise.append(error) iteration = len(X) with AtomicWriter(data_path, mode="wb", overwrite=True).open() as f: np.savez_compressed(f, np.array(X), np.array(y), np.array(noise))
def test_error_on_invalid_priors(): opt = Optimizer(dimensions=[(-2.0, 2.0)], gp_priors=[], n_initial_points=0) with pytest.raises(ValueError): opt.tell([(0.0, )], 0.0)