示例#1
0
文件: _fanova.py 项目: yujiimt/optuna
    def evaluate(self, study: Study,
                 params: Optional[List[str]]) -> Dict[str, float]:
        distributions = _get_distributions(study, params)
        params_data, values_data = _get_study_data(study, distributions)

        evaluator = fANOVA(
            X=params_data,
            Y=values_data,
            config_space=_get_configuration_space(distributions),
            max_features=max(1, int(params_data.shape[1] * 0.7)),
        )

        individual_importances = {}
        for i, name in enumerate(evaluator.cs.get_hyperparameter_names()):
            imp = evaluator.quantify_importance((i, ))
            imp = imp[(i, )]["individual importance"]
            individual_importances[name] = imp

        tot_importance = sum(v for v in individual_importances.values())
        for name in individual_importances.keys():
            individual_importances[name] /= tot_importance

        param_importances = OrderedDict(
            reversed(
                sorted(
                    individual_importances.items(),
                    key=lambda name_and_importance: name_and_importance[1],
                )))
        return param_importances
示例#2
0
    def evaluate(self,
                 study: Study,
                 params: Optional[List[str]] = None) -> Dict[str, float]:
        distributions = _get_distributions(study, params)
        params_data, values_data = _get_study_data(study, distributions)

        if params_data.size == 0:  # `params` were given but as an empty list.
            return OrderedDict()

        params_data, cols_to_raw_cols = _encode_categorical(
            params_data, distributions.values())

        forest = self._forest
        forest.fit(params_data, values_data)
        feature_importances = forest.feature_importances_
        feature_importances_reduced = numpy.zeros(len(distributions))
        numpy.add.at(feature_importances_reduced, cols_to_raw_cols,
                     feature_importances)

        param_importances = OrderedDict()
        param_names = list(distributions.keys())
        for i in feature_importances_reduced.argsort()[::-1]:
            param_importances[
                param_names[i]] = feature_importances_reduced[i].item()

        return param_importances
示例#3
0
    def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]:
        distributions = _get_distributions(study, params)
        params_data, values_data = _get_study_data(study, distributions)

        if params_data.size == 0:  # `params` were given but as an empty list.
            return OrderedDict()

        # Many (deep) copies of the search spaces are required during the tree traversal and using
        # Optuna distributions will create a bottleneck.
        # Therefore, search spaces (parameter distributions) are represented by a single
        # `numpy.ndarray`, coupled with a list of flags that indicate whether they are categorical
        # or not.
        search_spaces = numpy.empty((len(distributions), 2), dtype=numpy.float64)
        search_spaces_is_categorical = []

        for i, distribution in enumerate(distributions.values()):
            if isinstance(distribution, CategoricalDistribution):
                search_spaces[i, 0] = 0
                search_spaces[i, 1] = len(distribution.choices)
                search_spaces_is_categorical.append(True)
            elif isinstance(
                distribution,
                (
                    DiscreteUniformDistribution,
                    IntLogUniformDistribution,
                    IntUniformDistribution,
                    LogUniformDistribution,
                    UniformDistribution,
                ),
            ):
                search_spaces[i, 0] = distribution.low
                search_spaces[i, 1] = distribution.high
                search_spaces_is_categorical.append(False)
            else:
                assert False

        evaluator = self._evaluator
        evaluator.fit(
            X=params_data,
            y=values_data,
            search_spaces=search_spaces,
            search_spaces_is_categorical=search_spaces_is_categorical,
        )

        importances = {}
        for i, name in enumerate(distributions.keys()):
            importance, _ = evaluator.get_importance((i,))
            importances[name] = importance

        total_importance = sum(importances.values())
        for name in importances.keys():
            importances[name] /= total_importance

        sorted_importances = OrderedDict(
            reversed(
                sorted(importances.items(), key=lambda name_and_importance: name_and_importance[1])
            )
        )
        return sorted_importances