def decode_simple_hypergrid(hypergrid: OptimizerService_pb2.SimpleHypergrid) -> SimpleHypergrid:
        assert isinstance(hypergrid, OptimizerService_pb2.SimpleHypergrid)
        decoded_hypergrid = SimpleHypergrid(
            name=hypergrid.Name,
            dimensions=[OptimizerServiceDecoder.decode_dimension(dimension) for dimension in hypergrid.Dimensions]
        )

        for subgrid in hypergrid.GuestSubgrids:
            decoded_subgrid = OptimizerServiceDecoder.decode_subgrid(subgrid)
            decoded_hypergrid.join(
                subgrid=decoded_subgrid.subgrid,
                on_external_dimension=decoded_subgrid.join_dimension
            )

        return decoded_hypergrid
class TestHierarchicalHypergrid3(unittest.TestCase):
    """ Tests the join on external dimension in hypergrids.

    In particular:
    * Hypergrid.join(subgrid, on_external_dimension=SomeDimension(...)) should:
        * Check if the dimension.name contains a subgrid name:
            * if yes - drop the prefix and call dimension_subgrid.join(subgrid, on_external_dimension)
            * otherwise we are joining here so:
                * if not dimension.intersects(self[dimension.name]): return self
                * self.joined_subgrids_by_pivot_dimension[dimension.name] = JoinedHypergrid(dimension, subgrid)

    * Randomly generating points from the supergrid should generate points from the newly joined subgrid
    * Point containment should work
    * Hypergrid containment should work (eventually)

    """

    def setUp(self):

        self.cache_param_space = SimpleHypergrid(
            name='cache_param_space',
            dimensions=[
                CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache'])
            ]
        )

        self.lru_cache_param_space = SimpleHypergrid(
            name='lru_cache_config',
            dimensions=[
                DiscreteDimension(name='size', min=1, max=2**20),
                OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red'])
            ]
        )


        self.associative_cache_implementation_root_param_space = SimpleHypergrid(
            name='associative_cache_config',
            dimensions=[
                CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']),
                CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list'])
            ]
        )


        self.mod_prime_hash_function_param_space = SimpleHypergrid(
            name='mod_prime_hash_function',
            dimensions=[
                OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59])
            ]
        )

        self.lowest_bits_param_space = SimpleHypergrid(
            name='lowest_bits',
            dimensions=[
                DiscreteDimension(name='num_bits', min=1, max=64)
            ]
        )


        self.binary_search_tree_param_space = SimpleHypergrid(
            name='binary_search_tree',
            dimensions=[
                DiscreteDimension(name='max_depth', min=1, max=2**10)
            ]
        )

        self.linked_list_param_space = SimpleHypergrid(
            name='linked_list',
            dimensions=[
                DiscreteDimension(name='max_length', min=1, max=2**10)
            ]
        )


        self.associative_cache_implementation_param_space = self.associative_cache_implementation_root_param_space.join(
            subgrid=self.mod_prime_hash_function_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function'])
        ).join(
            subgrid=self.lowest_bits_param_space,
            on_external_dimension=CategoricalDimension(name='hash_function_name', values='lowest_bits')
        ).join(
            subgrid=self.binary_search_tree_param_space,
            on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree'])
        )

        self.cache_param_space = self.cache_param_space.join(
            subgrid=self.lru_cache_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache'])
        ).join(
            subgrid=self.associative_cache_implementation_param_space,
            on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache'])
        ).join(
            subgrid=self.linked_list_param_space,
            on_external_dimension=CategoricalDimension(name='associative_cache_config.bucket_implementation', values=['linked_list'])
        )

    def test_external_dimension_join(self):
        for _ in range(10):
            print("################################################")
            random_config = self.cache_param_space.random()
            for param_name, value in random_config:
                print(param_name, value)
            print(random_config in self.cache_param_space)
        print("################################################")
class TestHierarchicalSpaces(unittest.TestCase):
    def setUp(self):
        self.emergency_buffer_settings = SimpleHypergrid(
            name='emergency_buffer_config',
            dimensions=[
                DiscreteDimension(name='log2_emergency_buffer_size',
                                  min=0,
                                  max=16),
                CategoricalDimension(name='use_colors', values=[True, False])
            ])

        self.emergency_buffer_color = SimpleHypergrid(
            name='emergency_buffer_color',
            dimensions=[
                CategoricalDimension(name='color',
                                     values=['Maroon', 'Crimson', 'Tanager'])
            ])

        self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join(
            subgrid=self.emergency_buffer_color,
            on_external_dimension=CategoricalDimension(name='use_colors',
                                                       values=[True]))

        self.hierarchical_settings = SimpleHypergrid(
            name='communication_channel_config',
            dimensions=[
                DiscreteDimension(name='num_readers', min=1, max=64),
                DiscreteDimension(name='log2_buffer_size', min=10, max=24),
                CategoricalDimension(name='use_emergency_buffer',
                                     values=[True, False])
            ]).join(subgrid=self.emergency_buffer_settings_with_color,
                    on_external_dimension=CategoricalDimension(
                        name='use_emergency_buffer', values=[True]))

    def test_composite_spaces(self):

        valid_config_no_emergency_buffer = Point(num_readers=1,
                                                 log2_buffer_size=10,
                                                 use_emergency_buffer=False)
        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)

        valid_emergency_buffer_config = Point(log2_emergency_buffer_size=2,
                                              use_colors=False)

        self.assertTrue(
            valid_emergency_buffer_config in self.emergency_buffer_settings)

        valid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config=valid_emergency_buffer_config)
        self.assertTrue(
            valid_config_with_emergency_buffer in self.hierarchical_settings)

        valid_emergency_buffer_color_config = Point(color='Crimson')
        valid_emergency_buffer_color_config_with_pivot_dimension = valid_emergency_buffer_color_config.copy(
        )
        valid_emergency_buffer_color_config_with_pivot_dimension[
            'use_colors'] = True
        self.assertTrue(
            valid_emergency_buffer_color_config_with_pivot_dimension in
            self.emergency_buffer_color)

        valid_colorful_emergency_buffer_config = Point(
            log2_emergency_buffer_size=2,
            use_colors=True,
            emergency_buffer_color=valid_emergency_buffer_color_config)
        valid_colorful_emergency_buffer_config_with_pivot_dimension = valid_colorful_emergency_buffer_config.copy(
        )
        valid_colorful_emergency_buffer_config_with_pivot_dimension[
            'use_emergency_buffer'] = True
        self.assertTrue(
            valid_colorful_emergency_buffer_config_with_pivot_dimension in
            self.emergency_buffer_settings_with_color)

        valid_config_with_emergency_buffer_colors = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            emergency_buffer_config=valid_colorful_emergency_buffer_config)

        valid_config_with_emergency_buffer_and_redundant_coordinates = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=False,
            log2_emergency_buffer_size=2)
        self.assertTrue(
            valid_config_with_emergency_buffer_and_redundant_coordinates in
            self.hierarchical_settings)

        another_invalid_config_with_emergency_buffer = Point(
            num_readers=1, log2_buffer_size=10, use_emergency_buffer=True)

        yet_another_invalid_config_with_emergency_buffer = Point(
            num_readers=1,
            log2_buffer_size=10,
            use_emergency_buffer=True,
            log2_emergency_buffer_size=40)

        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(
            valid_config_no_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(
            valid_config_with_emergency_buffer in self.hierarchical_settings)
        self.assertTrue(valid_config_with_emergency_buffer_colors in
                        self.hierarchical_settings)
        self.assertTrue(
            valid_config_with_emergency_buffer_and_redundant_coordinates in
            self.hierarchical_settings)
        self.assertTrue(another_invalid_config_with_emergency_buffer not in
                        self.hierarchical_settings)
        self.assertTrue(yet_another_invalid_config_with_emergency_buffer not in
                        self.hierarchical_settings)

    def test_generating_random_configs(self):
        used_emergency_buffer = False
        used_color = False
        used_crimson = False

        # Let's seed it to make sure we get consistent test results
        random_state = random.Random()
        random_state.seed(1)
        self.hierarchical_settings.random_state = random_state

        for _ in range(100):
            random_config = self.hierarchical_settings.random()
            self.assertTrue(random_config in self.hierarchical_settings)
            used_emergency_buffer = used_emergency_buffer or random_config[
                'use_emergency_buffer']
            if random_config['use_emergency_buffer']:
                used_color = used_color or random_config[
                    'emergency_buffer_config']['use_colors']
                if random_config['emergency_buffer_config']['use_colors']:
                    used_crimson = used_crimson or (
                        random_config['emergency_buffer_config']
                        ['emergency_buffer_color']['color'] == 'Crimson')
        self.assertTrue(used_emergency_buffer)
        self.assertTrue(used_color)
        self.assertTrue(used_crimson)

    def test_reseeding_random_state(self):
        previous_iteration_first_pass_points = None

        for i in range(10):
            # let's seed the grid for the first time
            random_state = random.Random()
            random_state.seed(i)
            self.hierarchical_settings.random_state = random_state

            first_pass_points = [
                self.hierarchical_settings.random() for _ in range(100)
            ]

            # let's do it again
            random_state = random.Random()
            random_state.seed(i)
            self.hierarchical_settings.random_state = random_state

            second_pass_points = [
                self.hierarchical_settings.random() for _ in range(100)
            ]

            for first_pass_point, second_pass_point in zip(
                    first_pass_points, second_pass_points):
                self.assertTrue(first_pass_point == second_pass_point)

            if previous_iteration_first_pass_points is not None:
                # Let's make sure we keep changing the points
                self.assertTrue(
                    any(previous != current for previous, current in zip(
                        previous_iteration_first_pass_points,
                        first_pass_points)))
            previous_iteration_first_pass_points = first_pass_points
示例#4
0
class OptimizationProblem:
    """Models an instance of an optimization problem.

    An instance of OptimizationProblem can be used to create a variety of optimizers and instantly enlighten them to
    what they are working with.

    Many optimization problems contain the same set of elements:
    1. Decision Variables / Search Space - decision variables characterized by their allowed ranges and constraints form a Search Space.
    2. Objectives - one or more values to optimize. Each objective is meant to be either maximized or minimized.
    3. Context - this represents either: 1. controlled variables in an active learning scenarios, or 2. context information in an online learning scenario.

    For example if we are attempting to optimize a smart cache:

    Decision variables:
        * cache implementation (array, hashmap), each implementation's parameters:
            * array: size, associativity, eviction policy
            * hashmap: size, hash function, bucket data structure, bucket size, bucket eviction policy

    Objectives:
        * latency
        * cache memory footprint
        * recomputation cost (averge, median, total)
        * hit ratio
        * cache utilization

    Context:
        * workload characteristics:
            * true working set size (only known in active learning scenario)
            * estimated working set size (possibly many estimators, many Confidence Interval sizes)
            * recomputation cost distribution (true or estimated)

        * deployment context:
            * machine characteristics:
                * num cores
                * amount of ram
                * disk type
            * runtime state:
                * cpu utilization
                * ram utilization
                * etc

    Parameters
    ----------
    parameter_space : Hypergrid
        Input parameter space for objective, i.e. the search space.
    objective_space : Hypergrid(
        Output space for the objective, can be (-inf, +inf)
    objectives : list[Objective]
        Objective function(s) to optimize, with input from parameter_space and output in objective_space.
    context_space : Hypergrid, default=None
        Additional run-time context features.

    Attributes
    ----------
    feature_space : Hypergrid
        Joint space of parameters and context.
    """

    # The dimensions that we inject to keep track of individual subspaces, but which are worthless
    # for modeling purposes.
    META_DIMENSION_NAMES = {
        "contains_parameters", "contains_context", "contains_objectives"
    }

    def __init__(
        self,
        parameter_space: Hypergrid,
        objective_space: Hypergrid,
        objectives: List[Objective],
        context_space: Hypergrid = None,
    ):
        self.parameter_space = parameter_space
        self.context_space = context_space

        assert not any(
            isinstance(dimension, CategoricalDimension)
            for dimension in objective_space.dimensions
        ), "Objective dimension cannot be Categorical."
        objective_dimension_names = {
            dimension.name
            for dimension in objective_space.dimensions
        }
        assert all(
            objective.name in objective_dimension_names for objective in
            objectives), "All objectives must belong to objective space."
        self.objective_space = objective_space
        # We need to keep track of which objective to minimize, and which one to maximize.
        self.objectives = objectives
        self.objective_names = [
            objective.name for objective in self.objectives
        ]

        # Fit functions / surrogate models will be fed features consisting of both context and parameters.
        # Thus, the feature space is comprised of both context and parameters.
        has_context = self.context_space is not None
        self.feature_space = SimpleHypergrid(
            name="features",
            dimensions=[
                CategoricalDimension(name="contains_context",
                                     values=[has_context])
            ]).join(subgrid=self.parameter_space,
                    on_external_dimension=CategoricalDimension(
                        name="contains_context", values=[has_context]))
        if has_context:
            self.feature_space = self.feature_space.join(
                subgrid=self.context_space,
                on_external_dimension=CategoricalDimension(
                    name="contains_context", values=[True]))

    def construct_feature_dataframe(self,
                                    parameters_df: pd.DataFrame,
                                    context_df: pd.DataFrame = None,
                                    product: bool = False):
        """Construct feature value dataframe from config value and context value dataframes.

        If product is True, creates a cartesian product, otherwise appends columns.

        """
        if (self.context_space is not None) and (context_df is None):
            raise ValueError(
                "Context required by optimization problem but not provided.")

        # prefix column names to adhere to dimensions in hierarchical hypergrid
        #
        features_df = parameters_df.rename(
            lambda x: f"{self.parameter_space.name}.{x}", axis=1)
        if context_df is not None and len(context_df) > 0:
            renamed_context_values = context_df.rename(
                lambda x: f"{self.context_space.name}.{x}", axis=1)
            features_df['contains_context'] = True
            if product:
                renamed_context_values['contains_context'] = True
                features_df = features_df.merge(renamed_context_values,
                                                how='outer',
                                                on='contains_context')
                features_df.index = parameters_df.index.copy()
            else:
                if len(parameters_df) != len(context_df):
                    raise ValueError(
                        f"Incompatible shape of parameters and context: {parameters_df.shape} and {context_df.shape}."
                    )
                features_df = pd.concat([features_df, renamed_context_values],
                                        axis=1)

        else:
            features_df['contains_context'] = False
        return features_df

    def deconstruct_feature_dataframe(
            self,
            features_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Splits the feature dataframe back into parameters and context dataframes.

        This is a workaround. What we should really do is implement this functionality as a proper operator on Hypergrids.
        """
        parameter_column_names_mapping = {
            f"{self.parameter_space.name}.{dimension_name}": dimension_name
            for dimension_name in self.parameter_space.dimension_names
        }
        existing_parameter_names = [
            parameter_name
            for parameter_name in parameter_column_names_mapping.keys()
            if parameter_name in features_df.columns
        ]
        parameters_df = features_df[existing_parameter_names]
        parameters_df.rename(columns=parameter_column_names_mapping,
                             inplace=True)

        if self.context_space is not None:
            context_column_names_mapping = {
                f"{self.context_space.name}.{dimension_name}": dimension_name
                for dimension_name in self.context_space.dimension_names
            }
            existing_context_column_names = [
                column_name
                for column_name in context_column_names_mapping.keys()
                if column_name in features_df.columns
            ]
            context_df = features_df[existing_context_column_names]
            context_df.rename(columns=context_column_names_mapping,
                              inplace=True)
        else:
            context_df = None

        return parameters_df, context_df

    def to_dict(self):
        return {
            "parameter_space":
            self.parameter_space,
            "context_space":
            self.context_space,
            "objective_space":
            self.objective_space,
            "objectives":
            [objective_to_dict(objective) for objective in self.objectives]
        }
class NestedPolynomialObjective(ObjectiveFunctionBase):
    """A hierarchical function with multiple nested polynomials.

    The value of num_nested_polynomials controls how many polynomials are created. All polynomials are created according to (nearly identical)
    configs specified by the polynomial_objective_config value. The configs for each polynomial differ only in their random seeds.

    The idea here is to provide a more general version of ThreeLevelQuadratic. In ThreeLevelQuadratic we have three two-dimensional,
    degree two polynomials, and we select between them using the "vertex_height" parameter.

    Here we have num_nested_polynomials functions, with configurable dimensions, degrees, and coefficient of variation. Optimizing this
    synthetic function is analogous to optimizing a component with multiple mutually-exclusive implementations.
    """

    def __init__(self, objective_function_config: Point):
        assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE
        ObjectiveFunctionBase.__init__(self, objective_function_config)

        # Let's start building the parameter space for it.
        #
        self._parameter_space = SimpleHypergrid(
            name="domain",
            dimensions=[
                CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)])
            ]
        )

        polynomial_objective_config = self.objective_function_config.polynomial_objective_config
        self._polynomial_objective_config = polynomial_objective_config
        self._polynomials = []
        # Let's create the required number of polynomials.
        #
        for i in range(self.objective_function_config.num_nested_polynomials):
            polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible.
            polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}")
            self._polynomials.append(polynomial)
            self._parameter_space.join(
                subgrid=polynomial.parameter_space,
                on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i])
            )

        self._output_space = SimpleHypergrid(
            name='output_space',
            dimensions=[
                ContinuousDimension(name='y', min=-math.inf, max=math.inf)
            ]
        )

    @property
    def parameter_space(self) -> Hypergrid:
        return self._parameter_space

    @property
    def output_space(self) -> Hypergrid:
        return self._output_space

    def evaluate_point(self, point: Point) -> Point:
        selected_polynomial = self._polynomials[point.polynomial_id]
        return selected_polynomial.evaluate_point(point[f"domain_{point.polynomial_id}"])

    def evaluate_dataframe(self, dataframe: pd.DataFrame) -> pd.DataFrame:
        # For now:
        values = []
        for i in range(len(dataframe.index)):
            row = dataframe.loc[[i]]
            point = Point.from_dataframe(row)
            value = self.evaluate_point(point)
            values.append(value.y)
        return pd.DataFrame({'y': values})


    def get_context(self) -> Point:
        """ Returns the config used to create the polynomial.
        Down the road it could return some more info about the resulting polynomial.
        :return:
        """
        return self._polynomial_objective_config