def evaluate(self, study: Study, params: Optional[List[str]]) -> Dict[str, float]: distributions = _get_distributions(study, params) params_data, values_data = _get_study_data(study, distributions) evaluator = fANOVA( X=params_data, Y=values_data, config_space=_get_configuration_space(distributions), max_features=max(1, int(params_data.shape[1] * 0.7)), ) individual_importances = {} for i, name in enumerate(evaluator.cs.get_hyperparameter_names()): imp = evaluator.quantify_importance((i, )) imp = imp[(i, )]["individual importance"] individual_importances[name] = imp tot_importance = sum(v for v in individual_importances.values()) for name in individual_importances.keys(): individual_importances[name] /= tot_importance param_importances = OrderedDict( reversed( sorted( individual_importances.items(), key=lambda name_and_importance: name_and_importance[1], ))) return param_importances
def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]: distributions = _get_distributions(study, params) params_data, values_data = _get_study_data(study, distributions) if params_data.size == 0: # `params` were given but as an empty list. return OrderedDict() params_data, cols_to_raw_cols = _encode_categorical( params_data, distributions.values()) forest = self._forest forest.fit(params_data, values_data) feature_importances = forest.feature_importances_ feature_importances_reduced = numpy.zeros(len(distributions)) numpy.add.at(feature_importances_reduced, cols_to_raw_cols, feature_importances) param_importances = OrderedDict() param_names = list(distributions.keys()) for i in feature_importances_reduced.argsort()[::-1]: param_importances[ param_names[i]] = feature_importances_reduced[i].item() return param_importances
def evaluate(self, study: Study, params: Optional[List[str]] = None) -> Dict[str, float]: distributions = _get_distributions(study, params) params_data, values_data = _get_study_data(study, distributions) if params_data.size == 0: # `params` were given but as an empty list. return OrderedDict() # Many (deep) copies of the search spaces are required during the tree traversal and using # Optuna distributions will create a bottleneck. # Therefore, search spaces (parameter distributions) are represented by a single # `numpy.ndarray`, coupled with a list of flags that indicate whether they are categorical # or not. search_spaces = numpy.empty((len(distributions), 2), dtype=numpy.float64) search_spaces_is_categorical = [] for i, distribution in enumerate(distributions.values()): if isinstance(distribution, CategoricalDistribution): search_spaces[i, 0] = 0 search_spaces[i, 1] = len(distribution.choices) search_spaces_is_categorical.append(True) elif isinstance( distribution, ( DiscreteUniformDistribution, IntLogUniformDistribution, IntUniformDistribution, LogUniformDistribution, UniformDistribution, ), ): search_spaces[i, 0] = distribution.low search_spaces[i, 1] = distribution.high search_spaces_is_categorical.append(False) else: assert False evaluator = self._evaluator evaluator.fit( X=params_data, y=values_data, search_spaces=search_spaces, search_spaces_is_categorical=search_spaces_is_categorical, ) importances = {} for i, name in enumerate(distributions.keys()): importance, _ = evaluator.get_importance((i,)) importances[name] = importance total_importance = sum(importances.values()) for name in importances.keys(): importances[name] /= total_importance sorted_importances = OrderedDict( reversed( sorted(importances.items(), key=lambda name_and_importance: name_and_importance[1]) ) ) return sorted_importances