def test_pareto_frontier_volume_simple(self): """A simple sanity test on the pareto frontier volume computations. """ # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x x = np.linspace(start=0, stop=1, num=100) y = 1 - x pareto_df = pd.DataFrame({'x': x, 'y': y}) optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid(name='objectives', dimensions=[ ContinuousDimension(name='x', min=0, max=1), ContinuousDimension(name='y', min=0, max=1) ]), objectives=[ Objective(name='x', minimize=False), Objective(name='y', minimize=False) ]) pareto_frontier = ParetoFrontier(optimization_problem, pareto_df) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume( num_samples=1000000) lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume( alpha=0.05) print(lower_bound, upper_bound) assert 0.49 < lower_bound < upper_bound < 0.51
def test_basic_functionality_on_2d_objective_space(self): """Basic sanity check. Mainly used to help us develop the API. """ # Let's just create a bunch of random points, build a pareto frontier # and verify that the invariants hold. # parameter_space = SimpleHypergrid( name='params', dimensions=[ ContinuousDimension(name='x1', min=0, max=10) ] ) objective_space = SimpleHypergrid( name='objectives', dimensions=[ ContinuousDimension(name='y1', min=0, max=10), ContinuousDimension(name='y2', min=0, max=10) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) num_rows = 100000 random_objectives_df = objective_space.random_dataframe(num_rows) # They don't match but they don't need to for this test. # random_params_df = parameter_space.random_dataframe(num_rows) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=random_objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df non_pareto_index = random_objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_repeated_values(self): """Validates that the algorithm does its job in the presence of repeated values. :return: """ optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid( name="objectives", dimensions=[ ContinuousDimension(name='y1', min=0, max=5), ContinuousDimension(name='y2', min=0, max=5) ] ), objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) expected_pareto_df = pd.DataFrame( [ [1, 2], [1, 2], [2, 1], [0.5, 2], [1, 1], [2, 0.5] ], columns=['y1', 'y2'] ) dominated_df = pd.DataFrame( [ [0.5, 0.5], [0.5, 1], [0.5, 1.5], [1, 0.5], [1.5, 0.5] ], columns=['y1', 'y2'] ) all_objectives_df = pd.concat([dominated_df, expected_pareto_df]) pareto_frontier = ParetoFrontier( optimization_problem, objectives_df=all_objectives_df, parameters_df=pd.DataFrame(index=all_objectives_df.index) ) computed_pareto_df = pareto_frontier.pareto_df assert computed_pareto_df.sort_values(by=['y1','y2']).equals(expected_pareto_df.sort_values(by=['y1', 'y2']))
def test_optimization_problem_none_context(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x", min=0, max=1), OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]), CategoricalDimension(name="y2", values=[True, False]) ]) objective_space = SimpleHypergrid(name="z", dimensions=[ ContinuousDimension( name="z\n special", min=-50, max=-49), ContinuousDimension(name="z1", min=-1, max=1) ]) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z\n special", minimize=True), Objective(name="z1", minimize=False) ]) encoded_problem = OptimizerServiceEncoder.encode_optimization_problem( optimization_problem) decoded_problem = OptimizerServiceDecoder.decode_optimization_problem( encoded_problem) print(f"Context space is: {decoded_problem.context_space}") assert decoded_problem.context_space is None # Ensure that the parameter space is still valid # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random( ) in optimization_problem.feature_space assert optimization_problem.feature_space.random( ) in decoded_problem.feature_space
def test_optimum_before_register_error(self): input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name='x', min=-10, max=10)]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default) with pytest.raises(ValueError): bayesian_optimizer.optimum() bayesian_optimizer.register( parameter_values_pandas_frame=pd.DataFrame({'x': [0.0]}), target_values_pandas_frame=pd.DataFrame({'y': [1.0]})) bayesian_optimizer.optimum()
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) # Define the optimization problem. # input_space = SimpleHypergrid( name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ] ) output_space = SimpleHypergrid( name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) self.optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)] )
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 2 for restart_num in range(num_restarts): optimizer_config = bayesian_optimizer_config_store.default optimizer_config.min_samples_required_for_guided_design_of_experiments = 20 optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best" optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2 local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) for bayesian_optimizer in [local_optimizer, remote_optimizer]: num_guided_samples = 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) print( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = y.to_dataframe() bayesian_optimizer.register( feature_values_pandas_frame=input_values_df, target_values_pandas_frame=target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) print( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(optimizer=bayesian_optimizer)
def test_construct_feature_dataframe_no_context(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) n_samples = 100 parameter_df = optimization_problem.parameter_space.random_dataframe( n_samples) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) assert feature_df.shape == ( n_samples, len(optimization_problem.parameter_space.dimension_names) + 1) expected_columns = sorted([ f"three_level_quadratic_config.{n}" for n in optimization_problem.parameter_space.dimension_names ]) assert ( feature_df.columns[:-1].sort_values() == expected_columns).all() assert feature_df.columns[-1] == "contains_context" assert not feature_df.contains_context.any()
def default_optimization_problem(self): if self._default_optimization_problem is None: return OptimizationProblem( parameter_space=self.parameter_space, objective_space=self.output_space, objectives=[Objective(name=dim_name, minimize=True) for dim_name in self.output_space.dimension_names] ) return self._default_optimization_problem
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_grpc_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_grpc_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_grpc_channel ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons() # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point( workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048 ) ) ) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator(logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[self.working_set_size_estimator, self.hit_rate_monitor] ) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ContinuousDimension(name="hit_rate", min=0, max=1)]), objectives=[Objective(name="hit_rate", minimize=False)] )
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() target_value = quadratic(**suggested_params_dict) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) bayesian_optimizer.register(input_values_df, target_values_df) if i > 20 and i % 20 == 0: self.logger.info( f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}" ) self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
def setup_class(cls): """ Set's up all the objects needed to test the RandomSearchOptimizer To test the RandomSearchOptimizer we need to first construct: * an optimization problem * a utility function To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction test. :return: """ global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) cls.input_space = objective_function.parameter_space cls.output_space = objective_function.output_space cls.input_values_dataframe = objective_function.parameter_space.random_dataframe( num_samples=2500) cls.output_values_dataframe = objective_function.evaluate_dataframe( cls.input_values_dataframe) cls.model_config = homogeneous_random_forest_config_store.default print(cls.model_config) cls.model = MultiObjectiveHomogeneousRandomForest( model_config=cls.model_config, input_space=cls.input_space, output_space=cls.output_space) cls.model.fit(cls.input_values_dataframe, cls.output_values_dataframe, iteration_number=len(cls.input_values_dataframe.index)) cls.utility_function_config = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) cls.optimization_problem = OptimizationProblem( parameter_space=cls.input_space, objective_space=cls.output_space, objectives=[Objective(name='y', minimize=True)]) cls.utility_function = ConfidenceBoundUtilityFunction( function_config=cls.utility_function_config, surrogate_model=cls.model, minimize=cls.optimization_problem.objectives[0].minimize)
def decode_optimization_problem(optimization_problem_pb2: OptimizationProblem_pb2) -> OptimizationProblem: return OptimizationProblem( parameter_space=json.loads(optimization_problem_pb2.ParameterSpace.HypergridJsonString, cls=HypergridJsonDecoder), objective_space=json.loads(optimization_problem_pb2.ObjectiveSpace.HypergridJsonString, cls=HypergridJsonDecoder), objectives=[ Objective(name=objective_pb2.Name, minimize=objective_pb2.Minimize) for objective_pb2 in optimization_problem_pb2.Objectives ], context_space=None if not optimization_problem_pb2.ContextSpace.HypergridJsonString else json.loads(optimization_problem_pb2.ContextSpace.HypergridJsonString, cls=HypergridJsonDecoder) )
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.INFO self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem self.workload_duration_s = 5 # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.cache_config_timer = Timer( timeout_ms=200, observer_callback=self._set_new_cache_configuration) self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.cache_config_timer, self.working_set_size_estimator ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="miss_rate", min=0, max=1) ]), context_space=None, # TODO: add working set size estimate objectives=[Objective(name="miss_rate", minimize=True)])
def test_pareto_shape(self, function_config_name): """Tests if the pareto frontier has the expected shape. For no phase difference, we would expect a pareto frontier to be a single point. For a phase difference of pi / 2 we would expect the pareto frontier to be on a quarter circle. For a phase difference of pi we would expect the pareto frontier to be on a diagonal. """ function_config = multi_objective_enveloped_waves_config_store.get_config_by_name(function_config_name) objective_function = MultiObjectiveEnvelopedWaves(function_config) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name=dim_name, minimize=False) for dim_name in objective_function.output_space.dimension_names] ) # Let's create a meshgrid of all params. # TODO: add this as a function in Hypergrids num_points = 100 if function_config_name != "pi_phase_difference" else 10 linspaces = [dimension.linspace(num_points) for dimension in objective_function.parameter_space.dimensions] meshgrids = np.meshgrid(*linspaces) flat_meshgrids = [meshgrid.flatten() for meshgrid in meshgrids] params_df = pd.DataFrame({ dim_name: flat_meshgrid for dim_name, flat_meshgrid in zip(objective_function.parameter_space.dimension_names, flat_meshgrids) }) objectives_df = objective_function.evaluate_dataframe(params_df) pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=params_df) pareto_df = pareto_frontier.pareto_df if function_config_name == "no_phase_difference": # Let's assert that the optimum is close to 4 and that all selected params are close to half of pi. assert len(pareto_df.index) == 1 for objective in optimization_problem.objectives: assert abs(pareto_df[objective.name].iloc[0] - 3) < 0.001 optimal_params_df = params_df.iloc[pareto_df.index] for param_name in objective_function.parameter_space.dimension_names: assert abs(optimal_params_df[param_name].iloc[0] - math.pi / 2) < 0.02 if function_config_name == "half_pi_phase_difference": expected_radius = 3 pareto_df['radius'] = np.sqrt(pareto_df['y0'] ** 2 + pareto_df['y1'] ** 2) pareto_df['error'] = pareto_df['radius'] - expected_radius assert (np.abs(pareto_df['error']) < 0.01).all() if function_config_name == "pi_phase_difference": # We expect that the absolute values of our objectives will be nearly identical. # assert (np.abs(pareto_df['y0'] + pareto_df['y1']) < 0.01).all()
def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated( self): """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) random_params_df = objective_function.parameter_space.random_dataframe( num_samples=10000) y_df = objective_function.evaluate_dataframe(random_params_df) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) bayesian_optimizer.register(random_params_df, y_df) num_guided_samples = 20 for i in range(num_guided_samples): # Suggest the parameters suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) # Register the observation with the optimizer bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"Optimum: {best_objective} Best Configuration: {best_config_point}" ) trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json") self.logger.info(f"Writing trace to {trace_output_path}") global_values.tracer.dump_trace_to_file( output_file_path=trace_output_path) global_values.tracer.clear_events()
def test_glow_worm_on_three_level_quadratic(self): output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) # Let's warm up the model a bit # num_warmup_samples = 1000 random_params_df = objective_function.parameter_space.random_dataframe( num_samples=num_warmup_samples) y = objective_function.evaluate_dataframe(random_params_df) model = HomogeneousRandomForestRegressionModel( model_config=self.model_config, input_space=objective_function.parameter_space, output_space=output_space) model.fit(feature_values_pandas_frame=random_params_df, target_values_pandas_frame=y, iteration_number=num_warmup_samples) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) utility_function = ConfidenceBoundUtilityFunction( function_config=self.utility_function_config, surrogate_model=model, minimize=optimization_problem.objectives[0].minimize) glow_worm_swarm_optimizer = GlowWormSwarmOptimizer( optimization_problem=optimization_problem, utility_function=utility_function, optimizer_config=glow_worm_swarm_optimizer_config_store.default) num_iterations = 5 for i in range(num_iterations): suggested_params = glow_worm_swarm_optimizer.suggest() print(f"[{i+1}/{num_iterations}] {suggested_params.to_json()}") self.assertTrue( suggested_params in objective_function.parameter_space)
def decode_optimization_problem( optimization_problem_pb2: OptimizerService_pb2.OptimizationProblem ) -> OptimizationProblem: return OptimizationProblem( parameter_space=OptimizerServiceDecoder.decode_hypergrid( optimization_problem_pb2.ParameterSpace), objective_space=OptimizerServiceDecoder.decode_hypergrid( optimization_problem_pb2.ObjectiveSpace), objectives=[ Objective(name=objective_pb2.Name, minimize=objective_pb2.Minimize) for objective_pb2 in optimization_problem_pb2.Objectives ], context_space=None if not optimization_problem_pb2.HasField("ContextSpace") else OptimizerServiceDecoder.decode_hypergrid( optimization_problem_pb2.ContextSpace))
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) if i > 20 and i % 20 == 0: best_config_point, best_objective = bayesian_optimizer.optimum( ) self.logger.info( f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(bayesian_optimizer) best_config, optimum = bayesian_optimizer.optimum() assert objective_function.parameter_space.contains_point(best_config) assert objective_function.output_space.contains_point(optimum) _, all_targets = bayesian_optimizer.get_all_observations() assert optimum.y == all_targets.min()[0] self.logger.info( f"Optimum: {optimum} best configuration: {best_config}")
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)] )
def test_construct_feature_dataframe_context(self): def f(parameters, context): return pd.DataFrame({ 'function_value': -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2) }) input_space = SimpleHypergrid( name="my_input_name", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) context_space = SimpleHypergrid( name="my_context_name", dimensions=[ContinuousDimension(name="y", min=-1, max=1)]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[Objective(name="function_value", minimize=True)], context_space=context_space) n_samples = 100 parameter_df = input_space.random_dataframe(n_samples) context_df = context_space.random_dataframe(n_samples) with pytest.raises(ValueError, match="Context required"): optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df, context_df=context_df) assert isinstance(feature_df, pd.DataFrame) assert feature_df.shape == (n_samples, 3) assert (feature_df.columns == [ 'my_input_name.x', 'contains_context', 'my_context_name.y' ]).all() assert feature_df.contains_context.all()
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 1000 for restart_num in range(num_restarts): bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 200 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}") input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def __init__(self, objective_function_config: Point): assert objective_function_config in multi_objective_nested_polynomial_config_space ObjectiveFunctionBase.__init__(self, objective_function_config) nested_polynomial_objective_config = objective_function_config.nested_polynomial_objective_config self._nested_polynomial_objective_config = nested_polynomial_objective_config self._ordered_output_dimension_names = [ f'y{i}' for i in range(objective_function_config.num_objectives) ] self._individual_objective_functions = KeyOrderedDict( ordered_keys=self._ordered_output_dimension_names, value_type=NestedPolynomialObjective) # Let's create the required number of objective functions. # for i in range(objective_function_config.num_objectives): nested_polynomial_objective_config.polynomial_objective_config.seed += i single_objective_function = NestedPolynomialObjective( objective_function_config=nested_polynomial_objective_config) self._individual_objective_functions[i] = single_objective_function self._parameter_space = self._individual_objective_functions[ 0].parameter_space self._output_space = SimpleHypergrid( name='output_space', dimensions=[ ContinuousDimension(name=output_dim_name, min=-math.inf, max=math.inf) for output_dim_name in self._ordered_output_dimension_names ]) self.default_optimization_problem = OptimizationProblem( parameter_space=self._parameter_space, objective_space=self._output_space, objectives=[ Objective(name=name, minimize=True) for name in self._ordered_output_dimension_names ])
def test_hierarchical_quadratic_cold_start(self): output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=MultilevelQuadratic.CONFIG_SPACE, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 1000 for restart_num in range(num_restarts): bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 200 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = MultilevelQuadratic.evaluate(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}") input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = pd.DataFrame({'y': [y]}) bayesian_optimizer.register(input_values_df, target_values_df) self.logger.info( f"[{restart_num}/{num_restarts}] Optimum: {bayesian_optimizer.optimum()}" )
def setup_method(self, method): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. Try a bunch of times before giving up. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: self.server = OptimizerServicesServer(port=port, num_threads=10) self.server.start() self.port = port break except: self.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}" ) if num_tries == max_num_tries: raise self.optimizer_service_channel = grpc.insecure_channel( f'localhost:{self.port}') self.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor( grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)])
def __init__(self, objective_function_config: Point = None): ObjectiveFunctionBase.__init__(self, objective_function_config) self.num_objectives = self.objective_function_config.num_objectives self.radius = self.objective_function_config.radius self.minimize = self.objective_function_config.minimize # Let's figure out the quadrant and which objectives to minimize. # if self.minimize == "all": # Let's keep angles in second quadrant. # self.theta_min = math.pi / 2 self.theta_max = math.pi self.minimize_mask = [True for _ in range(self.num_objectives)] elif self.minimize == "none": # Let's keep all angles in the first quadrant. # self.theta_min = 0 self.theta_max = math.pi / 2 self.minimize_mask = [False for _ in range(self.num_objectives)] elif self.objective_function_config.minimize == "some": # Let's keep all angles in the fourth quadrant. # self.theta_min = 1.5 * math.pi self.theta_max = 2 * math.pi # Let's minimize odd ones, that way the y{N-1} doesn't require a sign flip. # self.minimize_mask = [(i % 2) == 1 for i in range(self.num_objectives)] else: assert False # Let's put together the optimization problem. # parameter_dimensions = [ ContinuousDimension(name="radius", min=0, max=self.radius) ] for i in range(self.num_objectives): parameter_dimensions.append( ContinuousDimension(name=f"theta{i}", min=self.theta_min, max=self.theta_max)) self._parameter_space = SimpleHypergrid( name='spherical_coordinates', dimensions=parameter_dimensions) objective_dimensions = [] for i, minimize in enumerate(self.minimize_mask): if minimize: objective_dimensions.append( ContinuousDimension(name=f"y{i}", min=-self.radius, max=0)) else: objective_dimensions.append( ContinuousDimension(name=f"y{i}", min=0, max=self.radius)) self._objective_space = SimpleHypergrid( name='rectangular_coordinates', dimensions=objective_dimensions) # TODO: add this to the ObjectiveFunctionBase interface. # self.default_optimization_problem = OptimizationProblem( parameter_space=self._parameter_space, objective_space=self._objective_space, objectives=[ Objective(name=f'y{i}', minimize=minimize_objective) for i, minimize_objective in enumerate(self.minimize_mask) ])
def test_optimization_problem(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x",min=0,max=1), CategoricalDimension(name="y",values=[1,2,3]) ] ) objective_space = SimpleHypergrid( name="z", dimensions=[ ContinuousDimension(name="z",min=0,max=1), ContinuousDimension(name="z1",min=-1,max=1) ] ) context_space = SimpleHypergrid( name="context_space", dimensions=[ ContinuousDimension(name="x_c",min=0,max=1), CategoricalDimension(name="y_c",values=[1,2,3,4,6]) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z",minimize=True), Objective(name="z1",minimize=False) ], context_space=context_space ) encoded_problem = OptimizerMonitoringServiceEncoder.encode_optimization_problem(optimization_problem) decoded_problem = OptimizerMonitoringServiceDecoder.decode_optimization_problem(encoded_problem) # A = B iff A >= B && B <= A # Could be condensed to single loop but easier to read this way. # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Context Space for _ in range(1000): assert decoded_problem.context_space.random() in context_space assert context_space.random() in decoded_problem.context_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random() in optimization_problem.feature_space assert optimization_problem.feature_space.random() in decoded_problem.feature_space print(decoded_problem.objectives) assert len(decoded_problem.objectives) == 2 assert decoded_problem.objectives[0].name == "z" assert decoded_problem.objectives[1].name == "z1" assert decoded_problem.objectives[0].minimize assert not decoded_problem.objectives[1].minimize
def test_bayesian_optimizer_1d_nonconvex(self): # print seed for reproducible tests seed = np.random.randint(1e6) print(seed) random.seed(seed) np.random.seed(seed) sign = 1 for minimize in [True, False]: # define function sign = 1 if minimize else -1 def f(x): return (6 * x - 2)**2 * np.sin(12 * x - 4) # setup hypergrid # single continuous input dimension between 0 and 1 input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) # define output space, we might not know the exact ranges output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[ Objective(name="function_value", minimize=minimize) ]) optimizer_config = bayesian_optimizer_config_store.default random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config random_forest_config.decision_tree_regression_model_config.n_new_samples_before_refit = 1 random_forest_config.n_estimators = 20 optimizer_config.experiment_designer_config.confidence_bound_utility_function_config.alpha = 0.1 optimizer = BayesianOptimizer(optimization_problem, optimizer_config) def run_optimization(optimizer): # suggest new value from optimizer suggested_value = optimizer.suggest() input_values_df = suggested_value.to_dataframe() # suggested value are dictionary-like, keys are input space parameter names # evaluate target function target_value = sign * f(suggested_value['x']) # build dataframes to target_values_df = pd.DataFrame( {'function_value': [target_value]}) optimizer.register(input_values_df, target_values_df) for _ in range(40): run_optimization(optimizer) best_config_point, best_objective = optimizer.optimum() print( f"Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.assertLessEqual(sign * best_objective['function_value'], -5.5)
def test_hierarchical_quadratic_cold_start_random_configs(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) random_state = random.Random() num_restarts = 200 for restart_num in range(num_restarts): # Let's set up random seeds so that we can easily repeat failed experiments # random_state.seed(restart_num) bayesian_optimizer_config_store.parameter_space.random_state = random_state objective_function.parameter_space.random_state = random_state optimizer_config = bayesian_optimizer_config_store.parameter_space.random( ) # The goal here is to make sure the optimizer works with a lot of different configurations. # So let's make sure each run is not too long. # optimizer_config.min_samples_required_for_guided_design_of_experiments = 50 if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__: random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config random_forest_config.n_estimators = min( random_forest_config.n_estimators, 5) decision_tree_config = random_forest_config.decision_tree_regression_model_config decision_tree_config.min_samples_to_fit = 10 decision_tree_config.n_new_samples_before_refit = 10 if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__: optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5 self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: " ) self.logger.info( f"Optimizer config: {optimizer_config.to_json(indent=2)}") bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config, logger=self.logger) num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def test_hyperspheres(self, minimize, num_output_dimensions, num_points): """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points. The idea is that we want to find a pareto frontier that optimizes the cartesian coordinates of points defined using random spherical coordinates. By setting the radius of some of the points to the radius of the hypersphere, we guarantee that they are non-dominated. Such points must appear on the pareto frontier, though it's quite possible that other non-dominated points from the interior of the sphere could appear as well. The intuition in 2D is that we can draw a secant between two neighboring pareto efficient points on the perimeter. Any point that is between that secant and the perimeter is not dominated and would thus be pareto efficient as well. (Actually even more points are pareto efficient, but this subset is easiest to explain in text). We want to test scenarios where: 1) all objectives are maximized, 2) all objectives are minimized, 3) some objectives are maximized and some are minimized. We want to be able to do that for an arbitrary number of dimensions so as to extract maximum coverage from this simple test. How the test works? ------------------- For N objectives we will specify the following parameters: 1. radius - distance of a point from origin. 2. theta0, theta1, ..., theta{i}, ..., theta{N-1} - angle between the radius segment and the and the hyperplane containing unit vectors along y0, y1, ..., y{i-1} And the following N objectives that are computed from parameters: y0 = radius * cos(theta0) y1 = radius * sin(theta0) * cos(theta1) y2 = radius * sin(theta0) * sin(theta1) * cos(theta2) y3 = radius * sin(theta0) * sin(theta1) * sin(theta2) * cos(theta3) ... y{N-2} = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * cos(theta{N-1}) y{N-1} = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * sin(theta{N-1}) ^ !! sin instead of cos !! 1) Maximizing all objectives. To maximize all objectives we need to be them to be non-negative. In such as setup all points with r == sphere_radius will be pareto efficient. And we can assert that the computed pareto frontier contains them. This can be guaranteed, by keeping all angles theta in the first quadrant (0 .. pi/2) since both sin and cos are positive there. Thus their product will be too. 2) Minimizing all objectives. Similarily, to minimize all objectives we need them to be non-positive. In such a setup we know that all points with r == sphere_radius are pareto efficient and we can assert that they are returned in the computation. We observe that all objectives except for the last one contain any number of sin factors and a single cosine factor. Cosine is guaranteed to be negative in the second quadrant (pi/2 .. pi) and sine is guaranteed to be positive there. So keeping all thetas in the range [pi/2 .. pi] makes all objectives negative except for the last one (which we can simply flip manually) 3) Maximizing some objectives while minimizing others. We can take advantage of the fact that every second objective has an odd number of sin factors, whilst the rest has has an even number (again, except for the last one). So if we keep all sin factors negative, and all the cos factors positive, we get a neat situation of alternating objectives` signs. This is true in the fourth quadrant (3 * pi / 2 .. 2 * pi), where sin values are negative, and cos values are positive. The last objective - y{N-1} - will have N negative terms, so it will be positive if (N % 2) == 0 and negative otherwise. In other words: if (N % 2) == 0: maximize y{N-1} else: minimize y{N-1} :param self: :return: """ hypersphere_radius = 10 # Let's figure out the quadrant and which objectives to minimize. # theta_min = None theta_max = None minimize_mask: List[bool] = [] if minimize == "all": # Let's keep angles in second quadrant. # theta_min = math.pi / 2 theta_max = math.pi minimize_mask = [True for _ in range(num_output_dimensions)] elif minimize == "none": # Let's keep all angles in the first quadrant. # theta_min = 0 theta_max = math.pi / 2 minimize_mask = [False for _ in range(num_output_dimensions)] elif minimize == "some": # Let's keep all angles in the fourth quadrant. # theta_min = 1.5 * math.pi theta_max = 2 * math.pi # Let's minimize odd ones, that way the y{N-1} doesn't require a sign flip. # minimize_mask = [(i % 2) == 1 for i in range(num_output_dimensions)] else: assert False # Let's put together the optimization problem. # parameter_dimensions = [ContinuousDimension(name="radius", min=0, max=hypersphere_radius)] for i in range(num_output_dimensions): parameter_dimensions.append(ContinuousDimension(name=f"theta{i}", min=theta_min, max=theta_max)) parameter_space = SimpleHypergrid( name='spherical_coordinates', dimensions=parameter_dimensions ) objective_space = SimpleHypergrid( name='rectangular_coordinates', dimensions=[ ContinuousDimension(name=f"y{i}", min=0, max=hypersphere_radius) for i in range(num_output_dimensions) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[Objective(name=f'y{i}', minimize=minimize_objective) for i, minimize_objective in enumerate(minimize_mask)] ) random_params_df = optimization_problem.feature_space.random_dataframe(num_points) # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal. # optimal_points_index = random_params_df.sample( frac=0.1, replace=False, axis='index' ).index random_params_df.loc[optimal_points_index, ['spherical_coordinates.radius']] = hypersphere_radius # We can compute our objectives more efficiently, by maintaining a prefix of r * sin(theta0) * ... * sin(theta{i-1}) # prefix = random_params_df['spherical_coordinates.radius'] objectives_df = pd.DataFrame() for i in range(num_output_dimensions-1): objectives_df[f'y{i}'] = prefix * np.cos(random_params_df[f'spherical_coordinates.theta{i}']) prefix = prefix * np.sin(random_params_df[f'spherical_coordinates.theta{i}']) # Conveniently, by the time the loop exits, the prefix is the value of our last objective. # if minimize == "all": # Must flip the prefix first, since there was no negative cosine to do it for us. # objectives_df[f'y{num_output_dimensions-1}'] = -prefix else: objectives_df[f'y{num_output_dimensions - 1}'] = prefix # Just as conveniently, we can double check all of our math by invoking Pythagoras. Basically: # # assert y0**2 + y1**2 + ... == radius**2 # assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["spherical_coordinates.radius"], 2) < 0.000001).all() # Just a few more sanity checks before we do the pareto computation. # if minimize == "all": assert (objectives_df <= 0).all().all() elif minimize == "none": assert (objectives_df >= 0).all().all() else: for column, minimize_column in zip(objectives_df, minimize_mask): if minimize_column: assert (objectives_df[column] <= 0).all() else: assert (objectives_df[column] >= 0).all() pareto_df = ParetoFrontier.compute_pareto( optimization_problem=optimization_problem, objectives_df=objectives_df ) # We know that all of the pareto efficient points must be on the frontier. # assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty assert len(pareto_df.index) >= len(optimal_points_index) # If we flip all minimized objectives, we can assert on even more things. # for column, minimize_column in zip(objectives_df, minimize_mask): if minimize_column: objectives_df[column] = -objectives_df[column] pareto_df[column] = - pareto_df[column] non_pareto_index = objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0