示例#1
0
    def test_pareto_frontier_volume_simple(self):
        """A simple sanity test on the pareto frontier volume computations.
        """

        # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x
        x = np.linspace(start=0, stop=1, num=100)
        y = 1 - x
        pareto_df = pd.DataFrame({'x': x, 'y': y})
        optimization_problem = OptimizationProblem(
            parameter_space=None,
            objective_space=SimpleHypergrid(name='objectives',
                                            dimensions=[
                                                ContinuousDimension(name='x',
                                                                    min=0,
                                                                    max=1),
                                                ContinuousDimension(name='y',
                                                                    min=0,
                                                                    max=1)
                                            ]),
            objectives=[
                Objective(name='x', minimize=False),
                Objective(name='y', minimize=False)
            ])
        pareto_frontier = ParetoFrontier(optimization_problem, pareto_df)
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(
            num_samples=1000000)
        lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(
            alpha=0.05)
        print(lower_bound, upper_bound)
        assert 0.49 < lower_bound < upper_bound < 0.51
示例#2
0
    def test_basic_functionality_on_2d_objective_space(self):
        """Basic sanity check. Mainly used to help us develop the API.
        """

        # Let's just create a bunch of random points, build a pareto frontier
        # and verify that the invariants hold.
        #
        parameter_space = SimpleHypergrid(
            name='params',
            dimensions=[
                ContinuousDimension(name='x1', min=0, max=10)
            ]
        )

        objective_space = SimpleHypergrid(
            name='objectives',
            dimensions=[
                ContinuousDimension(name='y1', min=0, max=10),
                ContinuousDimension(name='y2', min=0, max=10)
            ]
        )

        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name='y1', minimize=False),
                Objective(name='y2', minimize=False)
            ]
        )

        num_rows = 100000
        random_objectives_df = objective_space.random_dataframe(num_rows)

        # They don't match but they don't need to for this test.
        #
        random_params_df = parameter_space.random_dataframe(num_rows)

        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=random_objectives_df,
            parameters_df=random_params_df
        )
        pareto_df = pareto_frontier.pareto_df

        non_pareto_index = random_objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
示例#3
0
    def setup_class(cls):
        """ Set's up all the objects needed to test the UtilityFunctionOptimizers

        To test the UtilityFunctionOptimizers we need to first construct:
            * an objective function for the model to approximate and its corresponding parameter and output spaces
            * an optimization problem
            * a regression model, then train it on some random parameters to the objective function
            * a utility function that utilizes the model
            * a pareto frontier over the random parameters

            And only then do we get to test our utility function optimizers. This is a lot of work and a somewhat cleaner approach
        would be to simply create an instance of the BayesianOptimizer to do all of the above for us, but then we might not be able
        to test the utility function optimizers as thoroughly as we need to.



        :return:
        """
        global_values.declare_singletons()
        global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)

        cls.logger = create_logger("TestUtilityFunctionOptimizers")

        cls.model_config = multi_objective_pass_through_model_config_store.default

        cls.model = MultiObjectivePassThroughModelForTesting(
            model_config=cls.model_config,
            logger=cls.logger
        )
        cls.objective_function = cls.model.objective_function
        cls.parameter_space = cls.objective_function.parameter_space
        cls.objective_space = cls.objective_function.output_space

        cls.optimization_problem = cls.objective_function.default_optimization_problem
        cls.utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05)

        cls.utility_function = ConfidenceBoundUtilityFunction(
            function_config=cls.utility_function_config,
            surrogate_model=cls.model,
            minimize=cls.optimization_problem.objectives[0].minimize,
            logger=cls.logger
        )

        # To make the pareto frontier we have to generate some random points.
        #
        cls.parameters_df = cls.objective_function.parameter_space.random_dataframe(1000)
        cls.objectives_df = cls.objective_function.evaluate_dataframe(cls.parameters_df)

        cls.pareto_frontier = ParetoFrontier(
            optimization_problem=cls.optimization_problem,
            objectives_df=cls.objectives_df,
            parameters_df=cls.parameters_df
        )
示例#4
0
    def test_pareto_shape(self, function_config_name):
        """Tests if the pareto frontier has the expected shape.

        For no phase difference, we would expect a pareto frontier to be a single point.
        For a phase difference of pi / 2 we would expect the pareto frontier to be on a quarter circle.
        For a phase difference of pi we would expect the pareto frontier to be on a diagonal.
        """

        function_config = multi_objective_enveloped_waves_config_store.get_config_by_name(function_config_name)
        objective_function = MultiObjectiveEnvelopedWaves(function_config)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name=dim_name, minimize=False) for dim_name in objective_function.output_space.dimension_names]
        )

        # Let's create a meshgrid of all params.
        # TODO: add this as a function in Hypergrids

        num_points = 100 if function_config_name != "pi_phase_difference" else 10
        linspaces = [dimension.linspace(num_points) for dimension in objective_function.parameter_space.dimensions]
        meshgrids = np.meshgrid(*linspaces)
        flat_meshgrids = [meshgrid.flatten() for meshgrid in meshgrids]
        params_df = pd.DataFrame({
            dim_name: flat_meshgrid
            for dim_name, flat_meshgrid
            in zip(objective_function.parameter_space.dimension_names, flat_meshgrids)
        })
        objectives_df = objective_function.evaluate_dataframe(params_df)
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=params_df)
        pareto_df = pareto_frontier.pareto_df

        if function_config_name == "no_phase_difference":
            # Let's assert that the optimum is close to 4 and that all selected params are close to half of pi.
            assert len(pareto_df.index) == 1
            for objective in optimization_problem.objectives:
                assert abs(pareto_df[objective.name].iloc[0] - 3) < 0.001

            optimal_params_df = params_df.iloc[pareto_df.index]
            for param_name in objective_function.parameter_space.dimension_names:
                assert abs(optimal_params_df[param_name].iloc[0] - math.pi / 2) < 0.02

        if function_config_name == "half_pi_phase_difference":
            expected_radius = 3
            pareto_df['radius'] = np.sqrt(pareto_df['y0'] ** 2 + pareto_df['y1'] ** 2)
            pareto_df['error'] = pareto_df['radius'] - expected_radius
            assert (np.abs(pareto_df['error']) < 0.01).all()

        if function_config_name == "pi_phase_difference":
            # We expect that the absolute values of our objectives will be nearly identical.
            #
            assert (np.abs(pareto_df['y0'] + pareto_df['y1']) < 0.01).all()
示例#5
0
    def test_repeated_values(self):
        """Validates that the algorithm does its job in the presence of repeated values.

        :return:
        """

        optimization_problem = OptimizationProblem(
            parameter_space=None,
            objective_space=SimpleHypergrid(
                name="objectives",
                dimensions=[
                    ContinuousDimension(name='y1', min=0, max=5),
                    ContinuousDimension(name='y2', min=0, max=5)
                ]
            ),
            objectives=[
                Objective(name='y1', minimize=False),
                Objective(name='y2', minimize=False)
            ]
        )

        expected_pareto_df = pd.DataFrame(
            [
                [1, 2],
                [1, 2],
                [2, 1],
                [0.5, 2],
                [1, 1],
                [2, 0.5]
            ],
            columns=['y1', 'y2']
        )

        dominated_df = pd.DataFrame(
            [
                [0.5, 0.5],
                [0.5, 1],
                [0.5, 1.5],
                [1, 0.5],
                [1.5, 0.5]
            ],
            columns=['y1', 'y2']
        )

        all_objectives_df = pd.concat([dominated_df, expected_pareto_df])
        pareto_frontier = ParetoFrontier(
            optimization_problem,
            objectives_df=all_objectives_df,
            parameters_df=pd.DataFrame(index=all_objectives_df.index)
        )
        computed_pareto_df = pareto_frontier.pareto_df
        assert computed_pareto_df.sort_values(by=['y1','y2']).equals(expected_pareto_df.sort_values(by=['y1', 'y2']))
示例#6
0
    def _prepare_dummy_model_based_test_artifacts(self, dummy_model_config, logger):
        """Prepares all the artifacts we need to create and run a utility function optimizer.

        I chose to create them here rather than in setup class, to avoid unnecessarily creating all possible combinations for all
        possible tests. It's easier and cheaper to produce this artifacts just in time, rather than upfront.

        I suspect that pytest has a functionality to accomplish just this, but haven't found it yet.

        We need to produce:
        * an optimization problem
        * a model
        * a utility function
        * pareto frontier
        """
        model = MultiObjectivePassThroughModelForTesting(model_config=dummy_model_config, logger=logger)
        objective_function = model.objective_function
        optimization_problem = objective_function.default_optimization_problem

        # Let's create the pareto frontier.
        #
        params_df = objective_function.parameter_space.random_dataframe(1000)
        objectives_df = objective_function.evaluate_dataframe(params_df)
        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=objectives_df,
            parameters_df=params_df
        )

        if len(optimization_problem.objectives) == 1:
            utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05)
            utility_function=ConfidenceBoundUtilityFunction(
                function_config=utility_function_config,
                surrogate_model=model,
                minimize=optimization_problem.objectives[0].minimize,
                logger=logger
            )
        else:
            utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default
            utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction(
                function_config=utility_function_config,
                pareto_frontier=pareto_frontier,
                surrogate_model=model,
                logger=logger
            )
        return optimization_problem, model, utility_function, pareto_frontier
示例#7
0
    def test_hyperspheres(self, minimize, num_output_dimensions, num_points):
        """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points."""


        hypersphere_radius = 10

        objective_function_config = Point(
            implementation=Hypersphere.__name__,
            hypersphere_config=Point(
                num_objectives=num_output_dimensions,
                minimize=minimize,
                radius=hypersphere_radius
            )
        )

        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)
        optimization_problem = objective_function.default_optimization_problem
        random_params_df = optimization_problem.parameter_space.random_dataframe(num_points)

        # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal.
        #
        optimal_points_index = random_params_df.sample(
            frac=0.1,
            replace=False,
            axis='index'
        ).index

        random_params_df.loc[optimal_points_index, ['radius']] = hypersphere_radius
        objectives_df = objective_function.evaluate_dataframe(dataframe=random_params_df)



        # Conveniently, we can double check all of our math by invoking Pythagoras. Basically:
        #
        #   assert y0**2 + y1**2 + ... == radius**2
        #
        assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["radius"], 2) < 0.000001).all()


        # Just a few more sanity checks before we do the pareto computation.
        #
        if minimize == "all":
            assert (objectives_df <= 0).all().all()
        elif minimize == "none":
            assert (objectives_df >= 0).all().all()
        else:
            for column, minimize_column in zip(objectives_df, objective_function.minimize_mask):
                if minimize_column:
                    assert (objectives_df[column] <= 0).all()
                else:
                    assert (objectives_df[column] >= 0).all()


        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=objectives_df,
            parameters_df=random_params_df
        )
        pareto_df = pareto_frontier.pareto_df

        # We know that all of the pareto efficient points must be on the frontier.
        #
        assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty
        assert len(pareto_df.index) >= len(optimal_points_index)

        # If we flip all minimized objectives, we can assert on even more things.
        #
        for column, minimize_column in zip(objectives_df, objective_function.minimize_mask):
            if minimize_column:
                objectives_df[column] = -objectives_df[column]
                pareto_df[column] = - pareto_df[column]

        non_pareto_index = objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
示例#8
0
    def test_pareto_frontier_volume_on_hyperspheres(self, minimize, num_dimensions):
        """Uses a known formula for the volume of the hyperspheres to validate the accuracy of the pareto frontier estimate.

        :return:
        """
        hypersphere_radius = 10
        inscribed_hypersphere_radius = 7  # For computing lower bound on volume

        # In order to validate the estimates, we must know the allowable upper and lower bounds.
        # We know that the estimate should not be higher than the volume of the n-ball (ball in n-dimensions).
        # We can also come up with a lower bound, by computing a volume of a slightly smaller ball inscribed
        # into the hypersphere. Note that the volume of an n-ball can be computed recursively, so we keep track
        # of n-ball volumes in lower dimensions.

        upper_bounds_on_sphere_volume_by_num_dimensions = {}
        lower_bounds_on_sphere_volume_by_num_dimensions = {}

        # Compute the base cases for the recursion.
        #
        upper_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (hypersphere_radius ** 2)
        upper_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (hypersphere_radius ** 3)

        lower_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (inscribed_hypersphere_radius ** 2)
        lower_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (inscribed_hypersphere_radius ** 3)

        # Compute the recursive values.
        #
        for n in range(4, num_dimensions + 1):
            upper_bounds_on_sphere_volume_by_num_dimensions[n] = upper_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (hypersphere_radius ** 2) / n
            lower_bounds_on_sphere_volume_by_num_dimensions[n] = lower_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (inscribed_hypersphere_radius ** 2) / n

        objective_function_config = Point(
            implementation=Hypersphere.__name__,
            hypersphere_config=Point(
                num_objectives=num_dimensions,
                minimize=minimize,
                radius=hypersphere_radius
            )
        )
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config)
        parameter_space = objective_function.parameter_space

        num_points = max(4, num_dimensions)
        linspaces = []

        for dimension in parameter_space.dimensions:
            if dimension.name == 'radius':
                linspaces.append(np.array([hypersphere_radius]))
            else:
                linspaces.append(dimension.linspace(num_points))
        meshgrids = np.meshgrid(*linspaces)
        reshaped_meshgrids = [meshgrid.reshape(-1) for meshgrid in meshgrids]

        params_df = pd.DataFrame({
            dim_name: reshaped_meshgrids[i]
            for i, dim_name
            in enumerate(parameter_space.dimension_names)
        })

        objectives_df = objective_function.evaluate_dataframe(params_df)

        pareto_frontier = ParetoFrontier(
            optimization_problem=objective_function.default_optimization_problem,
            objectives_df=objectives_df,
            parameters_df=params_df
        )
        print("Num points in pareto frontier: ", len(objectives_df.index))
        assert len(pareto_frontier.pareto_df.index) == len(objectives_df.index)
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000)
        ci_lower_bound, ci_upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05)

        lower_bound_on_pareto_volume = lower_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions)
        upper_bound_on_pareto_volume = upper_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions)
        print("True bounds:", lower_bound_on_pareto_volume, upper_bound_on_pareto_volume)
        print("CI bounds: ", ci_lower_bound, ci_upper_bound)
        assert lower_bound_on_pareto_volume <= ci_lower_bound <= ci_upper_bound <= upper_bound_on_pareto_volume
示例#9
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger

        # Let's initialize the optimizer.
        #
        OptimizerBase.__init__(self, optimization_problem)

        assert not optimization_problem.objective_space.is_hierarchical(
        ), "Not supported."
        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."

        self.surrogate_model_output_space = optimization_problem.objective_space
        self.optimizer_config = optimizer_config
        self.pareto_frontier: ParetoFrontier = ParetoFrontier(
            optimization_problem=self.optimization_problem, objectives_df=None)

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation in (
            HomogeneousRandomForestRegressionModel.__name__,
            MultiObjectiveHomogeneousRandomForest.__name__)

        # Note that even if the user requested a HomogeneousRandomForestRegressionModel, we still create a MultiObjectiveRegressionModel
        # with just a single RandomForest inside it. This means we have to maintain only a single interface.
        #
        self.surrogate_model: MultiObjectiveRegressionModel = MultiObjectiveHomogeneousRandomForest(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.feature_space,
            output_space=self.surrogate_model_output_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            pareto_frontier=self.pareto_frontier,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        #
        self._parameter_names = [
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ]
        self._parameter_names_set = set(self._parameter_names)

        self._context_names = ([
            dimension.name
            for dimension in self.optimization_problem.context_space.dimensions
        ] if self.optimization_problem.context_space else [])
        self._context_names_set = set(self._context_names)

        self._target_names = [
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ]
        self._target_names_set = set(self._target_names)

        self._parameter_values_df = pd.DataFrame(columns=self._parameter_names)
        self._context_values_df = pd.DataFrame(columns=self._context_names)
        self._target_values_df = pd.DataFrame(columns=self._target_names)
示例#10
0
    def test_hyperspheres(self, minimize, num_output_dimensions, num_points):
        """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points.


        The idea is that we want to find a pareto frontier that optimizes the cartesian coordinates of points defined using random
        spherical coordinates.

        By setting the radius of some of the points to the radius of the hypersphere, we guarantee that they are non-dominated.
        Such points must appear on the pareto frontier, though it's quite possible that other non-dominated points from the interior
        of the sphere could appear as well. The intuition in 2D is that we can draw a secant between two neighboring pareto efficient
        points on the perimeter. Any point that is between that secant and the perimeter is not dominated and would thus be pareto
        efficient as well. (Actually even more points are pareto efficient, but this subset is easiest to explain in text).


        We want to test scenarios where:
            1) all objectives are maximized,
            2) all objectives are minimized,
            3) some objectives are maximized and some are minimized.

        We want to be able to do that for an arbitrary number of dimensions so as to extract maximum coverage from this simple test.


        How the test works?
        -------------------
        For N objectives we will specify the following parameters:
            1. radius - distance of a point from origin.
            2. theta0, theta1, ..., theta{i}, ..., theta{N-1} - angle between the radius segment and the and the hyperplane containing
                unit vectors along y0, y1, ..., y{i-1}


        And the following N objectives that are computed from parameters:
            y0      = radius * cos(theta0)
            y1      = radius * sin(theta0) * cos(theta1)
            y2      = radius * sin(theta0) * sin(theta1) * cos(theta2)
            y3      = radius * sin(theta0) * sin(theta1) * sin(theta2) * cos(theta3)
            ...
            y{N-2}  = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * cos(theta{N-1})
            y{N-1}  = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * sin(theta{N-1})
                                                                                    ^ !! sin instead of cos !!

        1) Maximizing all objectives.
            To maximize all objectives we need to be them to be non-negative. In such as setup all points with r == sphere_radius
            will be pareto efficient. And we can assert that the computed pareto frontier contains them.

            This can be guaranteed, by keeping all angles theta in the first quadrant (0 .. pi/2) since both sin and cos are
            positive there. Thus their product will be too.

        2) Minimizing all objectives.
            Similarily, to minimize all objectives we need them to be non-positive. In such a setup we know that all points with
            r == sphere_radius are pareto efficient and we can assert that they are returned in the computation.

            We observe that all objectives except for the last one contain any number of sin factors and a single cosine factor.
            Cosine is guaranteed to be negative in the second quadrant (pi/2 .. pi) and sine is guaranteed to be positive there.
            So keeping all thetas in the range [pi/2 .. pi] makes all objectives negative except for the last one (which we can
            simply flip manually)

        3) Maximizing some objectives while minimizing others.
            We can take advantage of the fact that every second objective has an odd number of sin factors, whilst the rest has
            has an even number (again, except for the last one). So if we keep all sin factors negative, and all the cos factors
            positive, we get a neat situation of alternating objectives` signs.

            This is true in the fourth quadrant (3 * pi / 2 .. 2 * pi), where sin values are negative, and cos values are positive.

            The last objective - y{N-1} - will have N negative terms, so it will be positive if (N % 2) == 0 and negative otherwise.
            In other words:
                if (N % 2) == 0:
                    maximize y{N-1}
                else:
                    minimize y{N-1}


        :param self:
        :return:
        """
        hypersphere_radius = 10

        # Let's figure out the quadrant and which objectives to minimize.
        #
        theta_min = None
        theta_max = None
        minimize_mask: List[bool] = []

        if minimize == "all":
            # Let's keep angles in second quadrant.
            #
            theta_min = math.pi / 2
            theta_max = math.pi
            minimize_mask = [True for _ in range(num_output_dimensions)]

        elif minimize == "none":
            # Let's keep all angles in the first quadrant.
            #
            theta_min = 0
            theta_max = math.pi / 2
            minimize_mask = [False for _ in range(num_output_dimensions)]

        elif minimize == "some":
            # Let's keep all angles in the fourth quadrant.
            #
            theta_min = 1.5 * math.pi
            theta_max = 2 * math.pi

            # Let's minimize odd ones, that way the y{N-1} doesn't require a sign flip.
            #
            minimize_mask = [(i % 2) == 1 for i in range(num_output_dimensions)]

        else:
            assert False

        # Let's put together the optimization problem.
        #
        parameter_dimensions = [ContinuousDimension(name="radius", min=0, max=hypersphere_radius)]
        for i in range(num_output_dimensions):
            parameter_dimensions.append(ContinuousDimension(name=f"theta{i}", min=theta_min, max=theta_max))

        parameter_space = SimpleHypergrid(
            name='spherical_coordinates',
            dimensions=parameter_dimensions
        )

        objective_space = SimpleHypergrid(
            name='rectangular_coordinates',
            dimensions=[
                ContinuousDimension(name=f"y{i}", min=0, max=hypersphere_radius)
                for i in range(num_output_dimensions)
            ]
        )

        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[Objective(name=f'y{i}', minimize=minimize_objective) for i, minimize_objective in enumerate(minimize_mask)]
        )

        random_params_df = optimization_problem.feature_space.random_dataframe(num_points)

        # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal.
        #
        optimal_points_index = random_params_df.sample(
            frac=0.1,
            replace=False,
            axis='index'
        ).index

        random_params_df.loc[optimal_points_index, ['spherical_coordinates.radius']] = hypersphere_radius

        # We can compute our objectives more efficiently, by maintaining a prefix of r * sin(theta0) * ... * sin(theta{i-1})
        #
        prefix = random_params_df['spherical_coordinates.radius']
        objectives_df = pd.DataFrame()

        for i in range(num_output_dimensions-1):
            objectives_df[f'y{i}'] = prefix * np.cos(random_params_df[f'spherical_coordinates.theta{i}'])
            prefix = prefix * np.sin(random_params_df[f'spherical_coordinates.theta{i}'])

        # Conveniently, by the time the loop exits, the prefix is the value of our last objective.
        #
        if minimize == "all":
            # Must flip the prefix first, since there was no negative cosine to do it for us.
            #
            objectives_df[f'y{num_output_dimensions-1}'] = -prefix
        else:
            objectives_df[f'y{num_output_dimensions - 1}'] = prefix


        # Just as conveniently, we can double check all of our math by invoking Pythagoras. Basically:
        #
        #   assert y0**2 + y1**2 + ... == radius**2
        #
        assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["spherical_coordinates.radius"], 2) < 0.000001).all()


        # Just a few more sanity checks before we do the pareto computation.
        #
        if minimize == "all":
            assert (objectives_df <= 0).all().all()
        elif minimize == "none":
            assert (objectives_df >= 0).all().all()
        else:
            for column, minimize_column in zip(objectives_df, minimize_mask):
                if minimize_column:
                    assert (objectives_df[column] <= 0).all()
                else:
                    assert (objectives_df[column] >= 0).all()


        pareto_df = ParetoFrontier.compute_pareto(
            optimization_problem=optimization_problem,
            objectives_df=objectives_df
        )

        # We know that all of the pareto efficient points must be on the frontier.
        #
        assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty
        assert len(pareto_df.index) >= len(optimal_points_index)

        # If we flip all minimized objectives, we can assert on even more things.
        #
        for column, minimize_column in zip(objectives_df, minimize_mask):
            if minimize_column:
                objectives_df[column] = -objectives_df[column]
                pareto_df[column] = - pareto_df[column]

        non_pareto_index = objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
示例#11
0
    def test_optimizers_against_untrained_models(self, objective_function_config_name, utility_function_type_name, utility_function_optimizer_type_name):
        """Tests that the utility function optimizers throw appropriate exceptions when the utility function cannot be evaluated.

        :return:
        """
        self.logger.info(f"Creating test artifacts for objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
        model_config = homogeneous_random_forest_config_store.default
        objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name)
        objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config)
        optimization_problem = objective_function.default_optimization_problem

        model = MultiObjectiveHomogeneousRandomForest(
            model_config=model_config,
            input_space=optimization_problem.feature_space,
            output_space=optimization_problem.objective_space,
            logger=self.logger
        )
        pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem)

        if utility_function_type_name == ConfidenceBoundUtilityFunction.__name__:
            utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05)
            utility_function = ConfidenceBoundUtilityFunction(
                function_config=utility_function_config,
                surrogate_model=model,
                minimize=optimization_problem.objectives[0].minimize,
                logger=self.logger
            )
        elif utility_function_type_name == MultiObjectiveProbabilityOfImprovementUtilityFunction.__name__:
            utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default
            utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction(
                function_config=utility_function_config,
                pareto_frontier=pareto_frontier,
                surrogate_model=model,
                logger=self.logger
            )
        else:
            assert False

        if utility_function_optimizer_type_name == RandomSearchOptimizer.__name__:
            utility_function_optimizer_config = random_search_optimizer_config_store.default
        elif utility_function_optimizer_type_name == GlowWormSwarmOptimizer.__name__:
            utility_function_optimizer_config = glow_worm_swarm_optimizer_config_store.default
        elif utility_function_optimizer_type_name == RandomNearIncumbentOptimizer.__name__:
            utility_function_optimizer_config = random_near_incumbent_optimizer_config_store.default
        else:
            assert False, f"Unknown utility_function_optimizer_type_name: {utility_function_optimizer_type_name}"

        utility_function_optimizer = UtilityFunctionOptimizerFactory.create_utility_function_optimizer(
            utility_function=utility_function,
            optimizer_type_name=utility_function_optimizer_type_name,
            optimizer_config=utility_function_optimizer_config,
            optimization_problem=optimization_problem,
            pareto_frontier=pareto_frontier,
            logger=self.logger
        )

        assert not model.trained

        self.logger.info("Asserting the optimizer is throwing appropriate exceptions.")
        num_failed_suggestions = 3
        for i in range(num_failed_suggestions):
            with pytest.raises(expected_exception=UnableToProduceGuidedSuggestionException):
                utility_function_optimizer.suggest()
            self.logger.info(f"[{i+1}/{num_failed_suggestions}] worked.")


        # Now let's train the model a bit and make sure that we can produce the suggestions afterwards
        #
        random_params_df = optimization_problem.parameter_space.random_dataframe(1000)
        objectives_df = objective_function.evaluate_dataframe(random_params_df)
        features_df = optimization_problem.construct_feature_dataframe(parameters_df=random_params_df)

        self.logger.info("Training the model")
        model.fit(features_df=features_df, targets_df=objectives_df, iteration_number=1000)
        assert model.trained
        self.logger.info("Model trained.")

        self.logger.info("Updating pareto.")
        pareto_frontier.update_pareto(objectives_df=objectives_df, parameters_df=random_params_df)
        self.logger.info("Pareto updated.")

        self.logger.info("Asserting suggestions work.")
        num_successful_suggestions = 3
        for i in range(num_successful_suggestions):
            suggestion = utility_function_optimizer.suggest()
            assert suggestion in optimization_problem.parameter_space
            self.logger.info(f"[{i+1}/{num_successful_suggestions}] successfully produced suggestion: {suggestion}")

        self.logger.info(f"Done testing. Objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")