示例#1
0
    def test_optimization_problem_none_context(self):
        parameter_space = SimpleHypergrid(
            name="test",
            dimensions=[
                ContinuousDimension(name="x", min=0, max=1),
                OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]),
                CategoricalDimension(name="y2", values=[True, False])
            ])
        objective_space = SimpleHypergrid(name="z",
                                          dimensions=[
                                              ContinuousDimension(
                                                  name="z\n special",
                                                  min=-50,
                                                  max=-49),
                                              ContinuousDimension(name="z1",
                                                                  min=-1,
                                                                  max=1)
                                          ])
        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name="z\n special", minimize=True),
                Objective(name="z1", minimize=False)
            ])

        encoded_problem = OptimizerServiceEncoder.encode_optimization_problem(
            optimization_problem)
        decoded_problem = OptimizerServiceDecoder.decode_optimization_problem(
            encoded_problem)

        print(f"Context space is: {decoded_problem.context_space}")
        assert decoded_problem.context_space is None

        # Ensure that the parameter space is still valid
        # Parameter Space
        for _ in range(1000):
            assert decoded_problem.parameter_space.random() in parameter_space
            assert parameter_space.random() in decoded_problem.parameter_space

        # Output Space
        for _ in range(1000):
            assert decoded_problem.objective_space.random() in objective_space
            assert objective_space.random() in decoded_problem.objective_space

        # Feature Space
        for _ in range(1000):
            assert decoded_problem.feature_space.random(
            ) in optimization_problem.feature_space
            assert optimization_problem.feature_space.random(
            ) in decoded_problem.feature_space
    def test_randomly_generating_team_member(self):
        self.logger.info("Starting first check in test.")
        mlos_team = SimpleHypergrid(
            name="mlos_team",
            dimensions=[
                CategoricalDimension(name="member", values=["Ed", "Greg", "Sergiy", "Yaser", "Adam", "Zack"])
            ]
        )

        random_member = mlos_team.random()
        assert random_member in mlos_team
class TestHierarchicalHypergrid3(unittest.TestCase):
    """ Tests the join on external dimension in hypergrids.

    In particular:
    * Hypergrid.join(subgrid, on_external_dimension=SomeDimension(...)) should:
        * Check if the dimension.name contains a subgrid name:
            * if yes - drop the prefix and call dimension_subgrid.join(subgrid, on_external_dimension)
            * otherwise we are joining here so:
                * if not dimension.intersects(self[dimension.name]): return self
                * self.joined_subgrids_by_pivot_dimension[dimension.name] = JoinedHypergrid(dimension, subgrid)

    * Randomly generating points from the supergrid should generate points from the newly joined subgrid
    * Point containment should work
    * Hypergrid containment should work (eventually)

    """

    def setUp(self):

        self.cache_param_space = SimpleHypergrid(
            name='cache_param_space',
            dimensions=[
                CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache'])
            ]
        )

        self.lru_cache_param_space = SimpleHypergrid(
            name='lru_cache_config',
            dimensions=[
                DiscreteDimension(name='size', min=1, max=2**20),
                OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red'])
            ]
        )


        self.associative_cache_implementation_root_param_space = SimpleHypergrid(
            name='associative_cache_config',
            dimensions=[
                CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']),
                CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list'])
            ]
        )


        self.mod_prime_hash_function_param_space = SimpleHypergrid(
            name='mod_prime_hash_function',
            dimensions=[
                OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59])
            ]
        )

        self.lowest_bits_param_space = SimpleHypergrid(
            name='lowest_bits',
            dimensions=[
                DiscreteDimension(name='num_bits', min=1, max=64)
            ]
        )


        self.binary_search_tree_param_space = SimpleHypergrid(
            name='binary_search_tree',
            dimensions=[
                DiscreteDimension(name='max_depth', min=1, max=2**10)
            ]
        )

        self.linked_list_param_space = SimpleHypergrid(
            name='linked_list',
            dimensions=[
                DiscreteDimension(name='max_length', min=1, max=2**10)
            ]
        )


        self.associative_cache_implementation_param_space = self.associative_cache_implementation_root_param_space.join(
            subgrid=self.mod_prime_hash_function_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function'])
        ).join(
            subgrid=self.lowest_bits_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values='lowest_bits')
        ).join(
            subgrid=self.binary_search_tree_param_space,
            on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree'])
        )

        self.cache_param_space = self.cache_param_space.join(
            subgrid=self.lru_cache_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache'])
        ).join(
            subgrid=self.associative_cache_implementation_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache'])
        ).join(
            subgrid=self.linked_list_param_space,
            on_external_dimension=CategoricalDimension(name='associative_cache_config.bucket_implementation', values=['linked_list'])
        )

    def test_external_dimension_join(self):
        for _ in range(10):
            print("################################################")
            random_config = self.cache_param_space.random()
            for param_name, value in random_config:
                print(param_name, value)
            print(random_config in self.cache_param_space)
        print("################################################")
    def test_optimization_problem(self):
        parameter_space = SimpleHypergrid(
            name="test",
            dimensions=[
                ContinuousDimension(name="x",min=0,max=1),
                CategoricalDimension(name="y",values=[1,2,3])
            ]
        )
        objective_space = SimpleHypergrid(
            name="z",
            dimensions=[
                ContinuousDimension(name="z",min=0,max=1),
                ContinuousDimension(name="z1",min=-1,max=1)
            ]
        )
        context_space = SimpleHypergrid(
            name="context_space",
            dimensions=[
                ContinuousDimension(name="x_c",min=0,max=1),
                CategoricalDimension(name="y_c",values=[1,2,3,4,6])
            ]
        )
        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name="z",minimize=True),
                Objective(name="z1",minimize=False)
            ],
            context_space=context_space
        )
        encoded_problem = OptimizerMonitoringServiceEncoder.encode_optimization_problem(optimization_problem)
        decoded_problem = OptimizerMonitoringServiceDecoder.decode_optimization_problem(encoded_problem)

        # A = B iff A >= B && B <= A
        # Could be condensed to single loop but easier to read this way.
        # Parameter Space
        for _ in range(1000):
            assert decoded_problem.parameter_space.random() in parameter_space
            assert parameter_space.random() in decoded_problem.parameter_space

        # Output Space
        for _ in range(1000):
            assert decoded_problem.objective_space.random() in objective_space
            assert objective_space.random() in decoded_problem.objective_space

        # Context Space
        for _ in range(1000):
            assert decoded_problem.context_space.random() in context_space
            assert context_space.random() in decoded_problem.context_space

        # Feature Space
        for _ in range(1000):
            assert decoded_problem.feature_space.random() in optimization_problem.feature_space
            assert optimization_problem.feature_space.random() in decoded_problem.feature_space

        print(decoded_problem.objectives)
        assert len(decoded_problem.objectives) == 2
        assert decoded_problem.objectives[0].name == "z"
        assert decoded_problem.objectives[1].name == "z1"
        assert decoded_problem.objectives[0].minimize
        assert not decoded_problem.objectives[1].minimize
class TestHierarchicalSpaces(unittest.TestCase):
    def setUp(self):
        self.emergency_buffer_settings = SimpleHypergrid(
            name='emergency_buffer_config',
            dimensions=[
                DiscreteDimension(name='log2_emergency_buffer_size',
                                  min=0,
                                  max=16),
                CategoricalDimension(name='use_colors', values=[True, False])
            ])

        self.emergency_buffer_color = SimpleHypergrid(
            name='emergency_buffer_color',
            dimensions=[
                CategoricalDimension(name='color',
                                     values=['Maroon', 'Crimson', 'Tanager'])
            ])

        self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join(
            subgrid=self.emergency_buffer_color,
            on_external_dimension=CategoricalDimension(name='use_colors',
                                                       values=[True]))

        self.hierarchical_settings = SimpleHypergrid(
            name='communication_channel_config',
            dimensions=[
                DiscreteDimension(name='num_readers', min=1, max=64),
                DiscreteDimension(name='log2_buffer_size', min=10, max=24),
                CategoricalDimension(name='use_emergency_buffer',
                                     values=[True, False])
            ]).join(subgrid=self.emergency_buffer_settings_with_color,
                    on_external_dimension=CategoricalDimension(
                        name='use_emergency_buffer', values=[True]))

    def test_composite_spaces(self):

        valid_config_no_emergency_buffer = Point(num_readers=1,
                                                 log2_buffer_size=10,
                                                 use_emergency_buffer=False)
        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)

        valid_emergency_buffer_config = Point(log2_emergency_buffer_size=2,
                                              use_colors=False)

        self.assertTrue(
            valid_emergency_buffer_config in self.emergency_buffer_settings)

        valid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config=valid_emergency_buffer_config)
        self.assertTrue(
            valid_config_with_emergency_buffer in self.hierarchical_settings)

        valid_emergency_buffer_color_config = Point(color='Crimson')
        valid_emergency_buffer_color_config_with_pivot_dimension = valid_emergency_buffer_color_config.copy(
        )
        valid_emergency_buffer_color_config_with_pivot_dimension[
            'use_colors'] = True
        self.assertTrue(
            valid_emergency_buffer_color_config_with_pivot_dimension in
            self.emergency_buffer_color)

        valid_colorful_emergency_buffer_config = Point(
            log2_emergency_buffer_size=2,
            use_colors=True,
            emergency_buffer_color=valid_emergency_buffer_color_config)
        valid_colorful_emergency_buffer_config_with_pivot_dimension = valid_colorful_emergency_buffer_config.copy(
        )
        valid_colorful_emergency_buffer_config_with_pivot_dimension[
            'use_emergency_buffer'] = True
        self.assertTrue(
            valid_colorful_emergency_buffer_config_with_pivot_dimension in
            self.emergency_buffer_settings_with_color)

        valid_config_with_emergency_buffer_colors = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config=valid_colorful_emergency_buffer_config)

        valid_config_with_emergency_buffer_and_redundant_coordinates = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=False,
            log2_emergency_buffer_size=2)
        self.assertTrue(
            valid_config_with_emergency_buffer_and_redundant_coordinates in
            self.hierarchical_settings)

        another_invalid_config_with_emergency_buffer = Point(
            num_readers=1, log2_buffer_size=10, use_emergency_buffer=True)

        yet_another_invalid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            log2_emergency_buffer_size=40)

        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(
            valid_config_with_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(valid_config_with_emergency_buffer_colors in
                        self.hierarchical_settings)
        self.assertTrue(
            valid_config_with_emergency_buffer_and_redundant_coordinates in
            self.hierarchical_settings)
        self.assertTrue(another_invalid_config_with_emergency_buffer not in
                        self.hierarchical_settings)
        self.assertTrue(yet_another_invalid_config_with_emergency_buffer not in
                        self.hierarchical_settings)

    def test_generating_random_configs(self):
        used_emergency_buffer = False
        used_color = False
        used_crimson = False

        # Let's seed it to make sure we get consistent test results
        random_state = random.Random()
        random_state.seed(1)
        self.hierarchical_settings.random_state = random_state

        for _ in range(100):
            random_config = self.hierarchical_settings.random()
            self.assertTrue(random_config in self.hierarchical_settings)
            used_emergency_buffer = used_emergency_buffer or random_config[
                'use_emergency_buffer']
            if random_config['use_emergency_buffer']:
                used_color = used_color or random_config[
                    'emergency_buffer_config']['use_colors']
                if random_config['emergency_buffer_config']['use_colors']:
                    used_crimson = used_crimson or (
                        random_config['emergency_buffer_config']
                        ['emergency_buffer_color']['color'] == 'Crimson')
        self.assertTrue(used_emergency_buffer)
        self.assertTrue(used_color)
        self.assertTrue(used_crimson)

    def test_reseeding_random_state(self):
        previous_iteration_first_pass_points = None

        for i in range(10):
            # let's seed the grid for the first time
            random_state = random.Random()
            random_state.seed(i)
            self.hierarchical_settings.random_state = random_state

            first_pass_points = [
                self.hierarchical_settings.random() for _ in range(100)
            ]

            # let's do it again
            random_state = random.Random()
            random_state.seed(i)
            self.hierarchical_settings.random_state = random_state

            second_pass_points = [
                self.hierarchical_settings.random() for _ in range(100)
            ]

            for first_pass_point, second_pass_point in zip(
                    first_pass_points, second_pass_points):
                self.assertTrue(first_pass_point == second_pass_point)

            if previous_iteration_first_pass_points is not None:
                # Let's make sure we keep changing the points
                self.assertTrue(
                    any(previous != current for previous, current in zip(
                        previous_iteration_first_pass_points,
                        first_pass_points)))
            previous_iteration_first_pass_points = first_pass_points
class TestHierarchicalHypergrid2(unittest.TestCase):
    """ Tests the improved implementation of the Hypergrids.

    In particular:
    * SimpleHypergrid.join() should attach to the root hypergrid if possible
    * SimpleHypergrids that are hierarchical implement a hierarchical namespace, where a coordinate within
        each subgrid is prefixed with the name of that subgrid:

    """
    def setUp(self):

        self.lru_cache_param_space = SimpleHypergrid(
            name='lru_cache_config',
            dimensions=[
                DiscreteDimension(name='size', min=1, max=2**20),
                OrdinalDimension(name='color',
                                 ordered_values=['green', 'orange', 'red'])
            ])

        self.mod_prime_hash_function_param_space = SimpleHypergrid(
            name='mod_prime_hash_function',
            dimensions=[
                OrdinalDimension(name='prime',
                                 ordered_values=[
                                     1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31,
                                     37, 41, 43, 47, 53, 59
                                 ])
            ])

        self.lowest_bits_param_space = SimpleHypergrid(
            name='lowest_bits',
            dimensions=[DiscreteDimension(name='num_bits', min=1, max=64)])

        self.binary_search_tree_param_space = SimpleHypergrid(
            name='binary_search_tree',
            dimensions=[DiscreteDimension(name='max_depth', min=1, max=2**10)])

        self.linked_list_param_space = SimpleHypergrid(
            name='linked_list',
            dimensions=[
                DiscreteDimension(name='max_length', min=1, max=2**10)
            ])

        self.associative_cache_implementation_param_space = SimpleHypergrid(
            name='associative_cache_config',
            dimensions=[
                CategoricalDimension(
                    name='hash_function_name',
                    values=['mod_prime_hash_function', 'lowest_bits']),
                CategoricalDimension(name='bucket_implementation',
                                     values=[
                                         'single_value', 'binary_search_tree',
                                         'linked_list'
                                     ])
            ]).join(subgrid=self.mod_prime_hash_function_param_space,
                    on_external_dimension=CategoricalDimension(
                        name='hash_function_name',
                        values=['mod_prime_hash_function'])).join(
                            subgrid=self.lowest_bits_param_space,
                            on_external_dimension=CategoricalDimension(
                                name='hash_function_name',
                                values=['lowest_bits'])
                        ).join(
                            subgrid=self.binary_search_tree_param_space,
                            on_external_dimension=CategoricalDimension(
                                name='bucket_implementation',
                                values=['binary_search_tree'])).join(
                                    subgrid=self.linked_list_param_space,
                                    on_external_dimension=CategoricalDimension(
                                        name='bucket_implementation',
                                        values=['linked_list']))

        self.cache_param_space = SimpleHypergrid(
            name='cache_param_space',
            dimensions=[
                CategoricalDimension(name='cache_implementation_name',
                                     values=['lru_cache', 'associative_cache'])
            ]).join(
                subgrid=self.lru_cache_param_space,
                on_external_dimension=CategoricalDimension(
                    name='cache_implementation_name', values=['lru_cache'])
            ).join(subgrid=self.associative_cache_implementation_param_space,
                   on_external_dimension=CategoricalDimension(
                       name='cache_implementation_name',
                       values=['associative_cache']))

    def test_efficient_join(self):
        """ Tests if the join efficiently flattens the tree of hypergrids.

        :return:
        """
        self.assertTrue(self.cache_param_space.name == 'cache_param_space')

        subgrids_joined_on_cache_implementation_name_dimension = set(
            joined_subgrid.subgrid
            for joined_subgrid in self.cache_param_space.
            joined_subgrids_by_pivot_dimension['cache_implementation_name'])
        self.assertTrue(self.lru_cache_param_space in
                        subgrids_joined_on_cache_implementation_name_dimension)
        self.assertTrue(self.associative_cache_implementation_param_space in
                        subgrids_joined_on_cache_implementation_name_dimension)

        subgrids_joined_on_hash_function_name_dimension = set(
            guest_subgrid.subgrid for guest_subgrid in
            self.associative_cache_implementation_param_space.
            joined_subgrids_by_pivot_dimension['hash_function_name'])
        self.assertTrue(self.mod_prime_hash_function_param_space in
                        subgrids_joined_on_hash_function_name_dimension)
        self.assertTrue(self.lowest_bits_param_space in
                        subgrids_joined_on_hash_function_name_dimension)

        subgrids_joined_on_bucket_implementation_dimension = set(
            guest_subgrid.subgrid for guest_subgrid in
            self.associative_cache_implementation_param_space.
            joined_subgrids_by_pivot_dimension['bucket_implementation'])
        self.assertTrue(self.binary_search_tree_param_space in
                        subgrids_joined_on_bucket_implementation_dimension)
        self.assertTrue(self.linked_list_param_space in
                        subgrids_joined_on_bucket_implementation_dimension)

    def test_name_flattening(self):
        num_tests = 1000

        for i in range(num_tests):
            random_config = self.cache_param_space.random()

            flat_dimensions = []
            for dimension_name, value in random_config:
                original_dimension = self.cache_param_space[dimension_name]
                flat_dimension = original_dimension.copy()
                flat_dimension.name = Dimension.flatten_dimension_name(
                    dimension_name)
                flat_dimensions.append(flat_dimension)

            # Let's create a flat hypergrid that contains that random_config
            flat_cache_param_space = SimpleHypergrid(
                name=f"Flat{self.cache_param_space.name}",
                dimensions=flat_dimensions)

            flat_random_config = random_config.flat_copy()
            self.assertTrue(flat_random_config in flat_cache_param_space)

            # let's try another random config
            another_random_config = self.cache_param_space.random()
            flattened_config = another_random_config.flat_copy()
            try:
                if flattened_config in flat_cache_param_space:
                    ...
                self.assertTrue(True)
            except:
                self.assertTrue(False)

    def test_that_getitem_returns_subgrid(self):
        """ Tests if we can use the __getitem__ operator to retrieve a subgrid.

        :return:
        """
        lru_cache_param_space = self.cache_param_space['lru_cache_config']
        for _ in range(1000):
            self.assertTrue(
                lru_cache_param_space.random() in self.lru_cache_param_space)
            self.assertTrue(
                self.lru_cache_param_space.random() in lru_cache_param_space)

    def test_that_getitem_returns_dimensions(self):
        """ Tests if we can use the __getitem__ operator to retrieve a dimension.

        :return:
        """
        cache_implementation_dimension = self.cache_param_space[
            "cache_implementation_name"]
        self.assertTrue(cache_implementation_dimension == CategoricalDimension(
            name='cache_implementation_name',
            values=['lru_cache', 'associative_cache']))
        num_bits_dimension = self.cache_param_space[
            "associative_cache_config"]["lowest_bits"]["num_bits"]
        self.assertTrue(
            num_bits_dimension == self.lowest_bits_param_space["num_bits"])

    def test_getitem_throws(self):
        with self.assertRaises(KeyError):
            self.cache_param_space["non_existent_dimension"]

    def test_that_collision_throws(self):
        """ Test that if we try to join on a subgrid that has the same name as an existing dimension, we throw.

        This is because the __getitem__ can return either a dimension or a subgrid, so their names cannot collide.

        :return:
        """
        with self.assertRaises(ValueError):
            SimpleHypergrid(
                name="collisions",
                dimensions=[
                    CategoricalDimension(name="associative_cache_config",
                                         values=[True, False]),
                    CategoricalDimension(
                        name='cache_implementation_name',
                        values=['lru_cache', 'associative_cache'])
                ]).join(
                    subgrid=self.associative_cache_implementation_param_space,
                    on_external_dimension=CategoricalDimension(
                        name='cache_implementation_name',
                        values=['associative_cache']))

    def test_pickling(self):
        for _ in range(100):
            random_point = self.cache_param_space.random()
            pickled = pickle.dumps(random_point)
            unpickled = pickle.loads(pickled)
            self.assertTrue(unpickled == random_point)
示例#7
0
    def test_optimization_with_context(self):
        # Gaussian blob in x with position dependent on context variable y.
        def f(parameters, context):
            if isinstance(parameters, pd.DataFrame):
                index = parameters.index
            else:
                index = [0]
            return pd.DataFrame(
                {
                    'function_value':
                    -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2)
                },
                index=index)

        input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=1)])
        output_space = SimpleHypergrid(name="objective",
                                       dimensions=[
                                           ContinuousDimension(
                                               name="function_value",
                                               min=-10,
                                               max=10)
                                       ])
        context_space = SimpleHypergrid(
            name="context",
            dimensions=[ContinuousDimension(name="y", min=-1, max=1)])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            # we want to minimize the function
            objectives=[Objective(name="function_value", minimize=True)],
            context_space=context_space)

        # create some data points to eval
        n_samples = 5000
        parameter_df = input_space.random_dataframe(n_samples)
        context_df = context_space.random_dataframe(n_samples)

        target_df = f(parameter_df, context_df)

        local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem, )

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df)

        with pytest.raises(
                ValueError,
                match="Incompatible shape of parameters and context"):
            local_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df,
                context_values_pandas_frame=context_df.iloc[:-1])

        local_optimizer.register(parameter_values_pandas_frame=parameter_df,
                                 target_values_pandas_frame=target_df,
                                 context_values_pandas_frame=context_df)

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.suggest()

        with pytest.raises(ValueError, match="Context required"):
            local_optimizer.predict(parameter_values_pandas_frame=parameter_df)

        suggestion = local_optimizer.suggest(context=context_space.random())
        assert isinstance(suggestion, Point)
        assert suggestion in input_space

        with pytest.raises(
                ValueError,
                match="Incompatible shape of parameters and context"):
            # unaligned parameters and context
            local_optimizer.predict(
                parameter_values_pandas_frame=parameter_df,
                context_values_pandas_frame=context_df.iloc[:-1])

        predictions = local_optimizer.predict(
            parameter_values_pandas_frame=parameter_df,
            context_values_pandas_frame=context_df)
        predictions_df = predictions.get_dataframe()
        assert len(predictions_df) == len(parameter_df)

        remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=optimization_problem, )

        with pytest.raises(ValueError,
                           match="not supported if context is provided"):
            local_optimizer.optimum(
                optimum_definition=OptimumDefinition.BEST_OBSERVATION,
                context=Point(y=0).to_dataframe())

        with pytest.raises(ValueError,
                           match="not supported if context is provided"):
            local_optimizer.optimum(
                optimum_definition=OptimumDefinition.BEST_OBSERVATION)

        with pytest.raises(ValueError,
                           match="requires context to be not None"):
            local_optimizer.optimum(optimum_definition=OptimumDefinition.
                                    BEST_SPECULATIVE_WITHIN_CONTEXT)

        # can't register, predict, suggest with context on remote optimizer
        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df,
                context_values_pandas_frame=context_df)

        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.predict(
                parameter_values_pandas_frame=parameter_df,
                context_values_pandas_frame=context_df)

        with pytest.raises(NotImplementedError,
                           match="Context not currently supported"):
            remote_optimizer.suggest(context=context_df)

        # context is missing but required by problem, should give error
        with pytest.raises(grpc.RpcError):
            remote_optimizer.register(
                parameter_values_pandas_frame=parameter_df,
                target_values_pandas_frame=target_df)

        # run some iterations on local optimizer to see we do something sensible
        for _ in range(100):
            # pick context at random
            context = context_space.random()
            suggested_config = local_optimizer.suggest(context=context)
            target_values = f(suggested_config, context)
            local_optimizer.register(
                parameter_values_pandas_frame=suggested_config.to_dataframe(),
                target_values_pandas_frame=target_values,
                context_values_pandas_frame=context.to_dataframe())

        optimum_y_1 = local_optimizer.optimum(
            optimum_definition=OptimumDefinition.
            BEST_SPECULATIVE_WITHIN_CONTEXT,
            context=Point(y=-1).to_dataframe())
        optimum_y1 = local_optimizer.optimum(
            optimum_definition=OptimumDefinition.
            BEST_SPECULATIVE_WITHIN_CONTEXT,
            context=Point(y=1).to_dataframe())
        assert optimum_y1.x > .6
        assert optimum_y_1.x < .4
示例#8
0
    def test_registering_multiple_objectives(self):

        input_space = SimpleHypergrid(name='input',
                                      dimensions=[
                                          ContinuousDimension(name="x_1",
                                                              min=0,
                                                              max=10),
                                          ContinuousDimension(name="x_2",
                                                              min=0,
                                                              max=10)
                                      ])

        output_space = SimpleHypergrid(name='output',
                                       dimensions=[
                                           ContinuousDimension(name="y_1",
                                                               min=0,
                                                               max=10),
                                           ContinuousDimension(name="y_2",
                                                               min=0,
                                                               max=10)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y_1', minimize=True)])

        optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem)

        for _ in range(100):
            input = optimizer.suggest()
            output = Point(y_1=input.x_1, y_2=input.x_2)

            optimizer.register(input.to_dataframe(), output.to_dataframe())

        num_predictions = 100
        prediction = optimizer.predict(
            parameter_values_pandas_frame=optimization_problem.parameter_space.
            random_dataframe(num_predictions))
        prediction_df = prediction.get_dataframe()
        assert len(prediction_df.index) == num_predictions

        # Let's test invalid observations.
        #
        input = input_space.random()
        input_df = input.to_dataframe()

        # We should only remember the valid dimensions.
        #
        output_with_extra_dimension = Point(y_1=input.x_1,
                                            y_2=input.x_2,
                                            invalid_dimension=42)
        output_with_extra_dimension_df = output_with_extra_dimension.to_dataframe(
        )
        optimizer.register(input_df, output_with_extra_dimension_df)

        # Let's make sure that the invalid_dimension was not remembered.
        #
        all_inputs_df, all_outputs_df, _ = optimizer.get_all_observations()
        assert all(column in {'y_1', 'y_2'}
                   for column in all_outputs_df.columns)

        # We should accept inputs with missing output dimensions, as long as at least one is specified.
        #
        output_with_missing_dimension = Point(y_1=input.x_1)
        output_with_missing_dimension_df = output_with_missing_dimension.to_dataframe(
        )
        optimizer.register(input_df, output_with_missing_dimension_df)
        all_inputs_df, all_outputs_df, _ = optimizer.get_all_observations()

        # Let's make sure the missing dimension ends up being a null.
        #
        last_observation = all_outputs_df.iloc[[-1]]
        assert last_observation['y_2'].isnull().values.all()

        # Inserting an observation with no valid dimensions should fail.
        #
        empty_output = Point()
        empty_output_df = empty_output.to_dataframe()
        with pytest.raises(ValueError):
            optimizer.register(input_df, empty_output_df)

        only_invalid_outputs = Point(invalid_col1=0, invalid_col2=2)
        only_invalid_outputs_df = only_invalid_outputs.to_dataframe()

        with pytest.raises(ValueError):
            optimizer.register(input_df, only_invalid_outputs_df)
示例#9
0
class TestHierarchicalHypergrid2(unittest.TestCase):
    """ Tests the improved implementation of the Hypergrids.

    In particular:
    * SimpleHypergrid.join() should attach to the root hypergrid if possible
    * SimpleHypergrids that are hierarchical implement a hierarchical namespace, where a coordinate within
        each subgrid is prefixed with the name of that subgrid:

    """

    def setUp(self):

        self.lru_cache_param_space = SimpleHypergrid(
            name='lru_cache_config',
            dimensions=[
                DiscreteDimension(name='size', min=1, max=2**20),
                OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red'])
            ]
        )

        self.mod_prime_hash_function_param_space = SimpleHypergrid(
            name='mod_prime_hash_function',
            dimensions=[
                OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59])
            ]
        )

        self.lowest_bits_param_space = SimpleHypergrid(
            name='lowest_bits',
            dimensions=[
                DiscreteDimension(name='num_bits', min=1, max=64)
            ]
        )

        self.binary_search_tree_param_space = SimpleHypergrid(
            name='binary_search_tree',
            dimensions=[
                DiscreteDimension(name='max_depth', min=1, max=2**10)
            ]
        )

        self.linked_list_param_space = SimpleHypergrid(
            name='linked_list',
            dimensions=[
                DiscreteDimension(name='max_length', min=1, max=2**10)
            ]
        )

        self.associative_cache_implementation_param_space = SimpleHypergrid(
            name='associative_cache_config',
            dimensions=[
                CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']),
                CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list'])
            ]
        ).join(
            subgrid=self.mod_prime_hash_function_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function'])
        ).join(
            subgrid=self.lowest_bits_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values=['lowest_bits'])
        ).join(
            subgrid=self.binary_search_tree_param_space,
            on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree'])
        ).join(
            subgrid=self.linked_list_param_space,
            on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['linked_list'])
        )

        self.cache_param_space = SimpleHypergrid(
            name='cache_param_space',
            dimensions=[
                CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache'])
            ]
        ).join(
            subgrid=self.lru_cache_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache'])
        ).join(
            subgrid=self.associative_cache_implementation_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache'])
        )

    def test_efficient_join(self):
        """ Tests if the join efficiently flattens the tree of hypergrids.

        :return:
        """
        self.assertTrue(self.cache_param_space.name == 'cache_param_space')

        subgrids_joined_on_cache_implementation_name_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.cache_param_space.guest_subgrids_by_pivot_dimension['cache_implementation_name'])
        self.assertTrue(self.lru_cache_param_space in subgrids_joined_on_cache_implementation_name_dimension)
        self.assertTrue(self.associative_cache_implementation_param_space in subgrids_joined_on_cache_implementation_name_dimension)

        subgrids_joined_on_hash_function_name_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space.guest_subgrids_by_pivot_dimension['hash_function_name'])
        self.assertTrue(self.mod_prime_hash_function_param_space in subgrids_joined_on_hash_function_name_dimension)
        self.assertTrue(self.lowest_bits_param_space in subgrids_joined_on_hash_function_name_dimension)

        subgrids_joined_on_bucket_implementation_dimension = set(guest_subgrid.subgrid for guest_subgrid in self.associative_cache_implementation_param_space.guest_subgrids_by_pivot_dimension['bucket_implementation'])
        self.assertTrue(self.binary_search_tree_param_space in subgrids_joined_on_bucket_implementation_dimension)
        self.assertTrue(self.linked_list_param_space in subgrids_joined_on_bucket_implementation_dimension)

    def test_name_flattening(self):
        num_tests = 1000

        for i in range(num_tests):
            random_config = self.cache_param_space.random()

            flat_dimensions = []
            for dimension_name, value in random_config:
                original_dimension = self.cache_param_space[dimension_name]
                flat_dimension = original_dimension.copy()
                flat_dimension.name = Dimension.flatten_dimension_name(dimension_name)
                flat_dimensions.append(flat_dimension)

            # Let's create a flat hypergrid that contains that random_config
            flat_cache_param_space = SimpleHypergrid(
                name=f"Flat{self.cache_param_space.name}",
                dimensions=flat_dimensions
            )

            flat_random_config = random_config.flat_copy()
            self.assertTrue(flat_random_config in flat_cache_param_space)

            # let's try another random config
            another_random_config = self.cache_param_space.random()
            flattened_config = another_random_config.flat_copy()
            try:
                if flattened_config in flat_cache_param_space:
                    ...
                self.assertTrue(True)
            except:
                self.assertTrue(False)