def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() target_value = quadratic(**suggested_params_dict) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) bayesian_optimizer.register(input_values_df, target_values_df) if i > 20 and i % 20 == 0: self.logger.info( f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}" ) self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated( self): """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) random_params_df = objective_function.parameter_space.random_dataframe( num_samples=10000) y_df = objective_function.evaluate_dataframe(random_params_df) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) bayesian_optimizer.register(random_params_df, y_df) num_guided_samples = 20 for i in range(num_guided_samples): # Suggest the parameters suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) # Register the observation with the optimizer bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"Optimum: {best_objective} Best Configuration: {best_config_point}" ) trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json") self.logger.info(f"Writing trace to {trace_output_path}") global_values.tracer.dump_trace_to_file( output_file_path=trace_output_path) global_values.tracer.clear_events()
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) if i > 20 and i % 20 == 0: best_config_point, best_objective = bayesian_optimizer.optimum( ) self.logger.info( f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(bayesian_optimizer) best_config, optimum = bayesian_optimizer.optimum() assert objective_function.parameter_space.contains_point(best_config) assert objective_function.output_space.contains_point(optimum) _, all_targets = bayesian_optimizer.get_all_observations() assert optimum.y == all_targets.min()[0] self.logger.info( f"Optimum: {optimum} best configuration: {best_config}")
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 1000 for restart_num in range(num_restarts): bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 200 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}") input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def test_hierarchical_quadratic_cold_start(self): output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=MultilevelQuadratic.CONFIG_SPACE, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 1000 for restart_num in range(num_restarts): bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 200 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = MultilevelQuadratic.evaluate(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}") input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = pd.DataFrame({'y': [y]}) bayesian_optimizer.register(input_values_df, target_values_df) self.logger.info( f"[{restart_num}/{num_restarts}] Optimum: {bayesian_optimizer.optimum()}" )
def test_hierarchical_quadratic_cold_start_random_configs(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) random_state = random.Random() num_restarts = 200 for restart_num in range(num_restarts): # Let's set up random seeds so that we can easily repeat failed experiments # random_state.seed(restart_num) bayesian_optimizer_config_store.parameter_space.random_state = random_state objective_function.parameter_space.random_state = random_state optimizer_config = bayesian_optimizer_config_store.parameter_space.random( ) # The goal here is to make sure the optimizer works with a lot of different configurations. # So let's make sure each run is not too long. # optimizer_config.min_samples_required_for_guided_design_of_experiments = 50 if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__: random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config random_forest_config.n_estimators = min( random_forest_config.n_estimators, 5) decision_tree_config = random_forest_config.decision_tree_regression_model_config decision_tree_config.min_samples_to_fit = 10 decision_tree_config.n_new_samples_before_refit = 10 if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__: optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5 self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: " ) self.logger.info( f"Optimizer config: {optimizer_config.to_json(indent=2)}") bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config, logger=self.logger) num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated( self): """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) x_1, x_2 = np.meshgrid(input_space['x_1'].linspace(num=501), input_space['x_2'].linspace(num=501)) y = quadratic(x_1=x_1, x_2=x_2) input_values_dataframe = pd.DataFrame({ 'x_1': x_1.reshape(-1), 'x_2': x_2.reshape(-1) }) output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)}) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) bayesian_optimizer.register(input_values_dataframe, output_values_dataframe) num_guided_samples = 20 for i in range(num_guided_samples): # Suggest the parameters suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() # Reformat them to feed the parameters to the target target_value = quadratic(**suggested_params_dict) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) # Reformat the observation to feed it back to the optimizer input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) # Register the observation with the optimizer bayesian_optimizer.register(input_values_df, target_values_df) self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}") trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json") self.logger.info(f"Writing trace to {trace_output_path}") global_values.tracer.dump_trace_to_file( output_file_path=trace_output_path) global_values.tracer.clear_events()
def test_hierarchical_quadratic_cold_start_random_configs(self): output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=MultilevelQuadratic.CONFIG_SPACE, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) random_state = random.Random() num_restarts = 100 has_failed = False for restart_num in range(num_restarts): try: # Let's set up random seeds so that we can easily repeat failed experiments # random_state.seed(restart_num) BayesianOptimizerConfig.CONFIG_SPACE.random_state = random_state MultilevelQuadratic.CONFIG_SPACE.random_state = random_state optimizer_config = BayesianOptimizerConfig.CONFIG_SPACE.random( ) self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: " ) self.logger.info( f"Optimizer config: {optimizer_config.to_json(indent=2)}") bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config, logger=self.logger) num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = MultilevelQuadratic.evaluate(suggested_params) self.logger.info( f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = pd.DataFrame({'y': [y]}) bayesian_optimizer.register(input_values_df, target_values_df) self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Optimum: {bayesian_optimizer.optimum()}" ) except Exception as e: has_failed = True error_file_path = os.path.join(os.getcwd(), "temp", "test_errors.txt") with open(error_file_path, 'a') as out_file: out_file.write( "##################################################################################\n" ) out_file.write(f"{restart_num} failed.\n") out_file.write(f"Exception: {e}") self.assertFalse(has_failed)
def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated( self): """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) x_1, x_2 = np.meshgrid(input_space['x_1'].linspace(num=101), input_space['x_2'].linspace(num=101)) y = quadratic(x_1=x_1, x_2=x_2) input_values_dataframe = pd.DataFrame({ 'x_1': x_1.reshape(-1), 'x_2': x_2.reshape(-1) }) output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)}) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) bayesian_optimizer.register(input_values_dataframe, output_values_dataframe) num_guided_samples = 2 for _ in range(num_guided_samples): # Suggest the parameters suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() # Reformat them to feed the parameters to the target target_value = quadratic(**suggested_params_dict) print(suggested_params, target_value) # Reformat the observation to feed it back to the optimizer input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) # Register the observation with the optimizer bayesian_optimizer.register(input_values_df, target_values_df) print(bayesian_optimizer.optimum())
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-10, max=10), ContinuousDimension(name='x_2', min=-10, max=10) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) optimizer_config = BayesianOptimizerConfig.DEFAULT optimizer_config.min_samples_required_for_guided_design_of_experiments = 50 optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best" optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2 print(optimizer_config.to_json(indent=2)) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config, logger=self.logger) num_iterations = 62 for i in range(num_iterations): suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() target_value = quadratic(**suggested_params_dict) print( f"[{i+1}/{num_iterations}] Suggested params: {suggested_params_dict}, target_value: {target_value}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) bayesian_optimizer.register(input_values_df, target_values_df) if i > optimizer_config.min_samples_required_for_guided_design_of_experiments and i % 10 == 1: print( f"[{i}/{num_iterations}] Optimum: {bayesian_optimizer.optimum()}" ) convergence_state = bayesian_optimizer.get_optimizer_convergence_state( ) random_forest_fit_state = convergence_state.surrogate_model_fit_state random_forest_gof_metrics = random_forest_fit_state.current_train_gof_metrics print( f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}, Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}" ) convergence_state = bayesian_optimizer.get_optimizer_convergence_state( ) random_forest_fit_state = convergence_state.surrogate_model_fit_state random_forest_gof_metrics = random_forest_fit_state.current_train_gof_metrics self.assertTrue( random_forest_gof_metrics.last_refit_iteration_number > 0.7 * num_iterations) models_gof_metrics = [random_forest_gof_metrics] for decision_tree_fit_state in random_forest_fit_state.decision_trees_fit_states: models_gof_metrics.append( decision_tree_fit_state.current_train_gof_metrics) for model_gof_metrics in models_gof_metrics: self.assertTrue( 0 <= model_gof_metrics.relative_absolute_error <= 1 ) # This could fail if the models are really wrong. Not expected in this unit test though. self.assertTrue(0 <= model_gof_metrics.relative_squared_error <= 1) # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it. n = model_gof_metrics.last_refit_iteration_number self.assertTrue( model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(n) * model_gof_metrics.mean_absolute_error) # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly. self.assertTrue(model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate) goodness_of_fit_df = random_forest_fit_state.get_goodness_of_fit_dataframe( data_set_type=DataSetType.TRAIN) print(goodness_of_fit_df.head()) goodness_of_fit_df = random_forest_fit_state.get_goodness_of_fit_dataframe( data_set_type=DataSetType.TRAIN, deep=True) print(goodness_of_fit_df.head())