def _get_hyperparameter_search_space_pipeline_step(self, ps, include=None, dataset_properties={}): if include is not None: nodes = include else: nodes = ps.get_nodes() cs = ConfigurationSpace() choice = cs.add_hyperparameter( CategoricalHyperparameter('__choice__', [node.get_name() for node in nodes])) for node in nodes: sub_cs = node.get_hyperparameter_search_space( dataset_properties=dataset_properties) parent_hyperparameter = { 'parent': choice, 'value': node.get_name() } cs.add_configuration_space( node.get_name(), sub_cs, parent_hyperparameter=parent_hyperparameter) return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[str, str]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ) -> ConfigurationSpace: """Returns the configuration space of the current chosen components Args: dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on default (Optional[str]): Default component to use include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive list, and will exclusively use this components. exclude: Optional[Dict[str, Any]]: which components to skip Returns: ConfigurationSpace: the configuration space of the hyper-parameters of the chosen component """ cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_initializers = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_initializers) == 0: raise ValueError("No initializers found") if default is None: defaults = [ 'MLPNet', ] for default_ in defaults: if default_ in available_initializers: default = default_ break initializer = CSH.CategoricalHyperparameter( '__choice__', list(available_initializers.keys()), default_value=default) cs.add_hyperparameter(initializer) for name in available_initializers: initializer_configuration_space = available_initializers[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': initializer, 'value': name} cs.add_configuration_space( name, initializer_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[str, Any]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = dict() dataset_properties = {**self.dataset_properties, **dataset_properties} available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError("no encoders found, please add a encoder") if default is None: defaults = ['OneHotEncoder', 'OrdinalEncoder', 'NoEncoder'] for default_ in defaults: if default_ in available_preprocessors: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break # add only no encoder to choice hyperparameters in case the dataset is only numerical if len(dataset_properties['categorical_columns']) == 0: default = 'NoEncoder' preprocessor = CSH.CategoricalHyperparameter('__choice__', ['NoEncoder'], default_value=default) else: preprocessor = CSH.CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) # add only child hyperparameters of early_preprocessor choices for name in preprocessor.choices: preprocessor_configuration_space = available_preprocessors[name].\ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def get_hyperparameter_search_space(self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = dict() dataset_properties = {**self.dataset_properties, **dataset_properties} available_preprocessors = self.get_available_components(dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError("no image normalizers found, please add an image normalizer") if default is None: defaults = ['ImageNormalizer', 'NoNormalizer'] for default_ in defaults: if default_ in available_preprocessors: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset(available_preprocessors): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format(self.__class__.__name__, available_preprocessors, choice_hyperparameter.value_range)) preprocessor = CSH.CategoricalHyperparameter('__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: preprocessor = CSH.CategoricalHyperparameter('__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) # add only child hyperparameters of preprocessor choices for name in preprocessor.choices: preprocessor_configuration_space = available_preprocessors[name].\ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space(name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def load_default_configspace(primitive): default_config = ConfigurationSpace() if primitive in get_hyperparameters_from_metalearnig(): default_config.add_configuration_space( primitive, get_configspace_from_metalearning( get_hyperparameters_from_metalearnig()[primitive]), '|') return default_config
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): if dataset_properties is None: dataset_properties = {} if include is not None and exclude is not None: raise ValueError("The arguments include and " "exclude cannot be used together.") cs = ConfigurationSpace() # Compile a list of all estimator objects for this problem available_estimators = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_estimators) == 0: raise ValueError("No classifiers found") if default is None: defaults = ['random_forest', 'liblinear_svc', 'sgd', 'libsvm_svc' ] + list(available_estimators.keys()) for default_ in defaults: if default_ in available_estimators: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break estimator = CategoricalHyperparameter('__choice__', list( available_estimators.keys()), default_value=default) cs.add_hyperparameter(estimator) for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[estimator_name].\ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = { 'parent': estimator, 'value': estimator_name } cs.add_configuration_space( estimator_name, estimator_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): if dataset_properties is None: dataset_properties = {} if include is not None and exclude is not None: raise ValueError("The arguments include_estimators and " "exclude_estimators cannot be used together.") cs = ConfigurationSpace() # Compile a list of all estimator objects for this problem available_estimators = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_estimators) == 0: raise ValueError("No classifiers found") if default is None: defaults = ['random_forest', 'liblinear_svc', 'sgd', 'libsvm_svc'] + list(available_estimators.keys()) for default_ in defaults: if default_ in available_estimators: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break estimator = CategoricalHyperparameter('__choice__', list(available_estimators.keys()), default_value=default) cs.add_hyperparameter(estimator) for estimator_name in available_estimators.keys(): estimator_configuration_space = available_estimators[estimator_name].\ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': estimator, 'value': estimator_name} cs.add_configuration_space(estimator_name, estimator_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def _get_hyperparameter_search_space_recursevely( dataset_properties: DATASET_PROPERTIES_TYPE, cs: ConfigurationSpace, transformer: BaseEstimator, ) -> ConfigurationSpace: for st_name, st_operation in transformer: if hasattr(st_operation, "get_hyperparameter_search_space"): cs.add_configuration_space( st_name, st_operation.get_hyperparameter_search_space(dataset_properties)) else: return FeatTypeSplit._get_hyperparameter_search_space_recursevely( dataset_properties, cs, st_operation) return cs
def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None ) -> ConfigurationSpace: cs = ConfigurationSpace() available_augmenters: Dict[str, BaseImageAugmenter] = get_components() if dataset_properties is None: dataset_properties = dict() # add child hyperparameters for name in available_augmenters.keys(): preprocessor_configuration_space = available_augmenters[name].\ get_hyperparameter_search_space(dataset_properties) cs.add_configuration_space(name, preprocessor_configuration_space) return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, default: Optional[str] = None, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, ) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No ohe hot encoders found, please add any one hot encoder " "component.") if default is None: defaults = ['one_hot_encoding', 'no_encoding'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def get_hyperparameter_search_space(dataset_properties: Optional[Dict[ str, str]] = None, **kwargs: Any) -> ConfigurationSpace: cs = ConfigurationSpace() backbones: Dict[str, Type[BaseBackbone]] = get_available_backbones() heads: Dict[str, Type[BaseHead]] = get_available_heads() # filter backbones and heads for those who support the current task type if dataset_properties is not None and "task_type" in dataset_properties: task = dataset_properties["task_type"] backbones = { name: backbone for name, backbone in backbones.items() if task in backbone.supported_tasks } heads = { name: head for name, head in heads.items() if task in head.supported_tasks } backbone_hp = CategoricalHyperparameter("backbone", choices=backbones.keys()) head_hp = CategoricalHyperparameter("head", choices=heads.keys()) cs.add_hyperparameters([backbone_hp, head_hp]) # for each backbone and head, add a conditional search space if this backbone or head is chosen for backbone_name in backbones.keys(): backbone_cs = backbones[ backbone_name].get_hyperparameter_search_space( dataset_properties) cs.add_configuration_space(backbone_name, backbone_cs, parent_hyperparameter={ "parent": backbone_hp, "value": backbone_name }) for head_name in heads.keys(): head_cs: ConfigurationSpace = heads[ head_name].get_hyperparameter_search_space(dataset_properties) cs.add_configuration_space(head_name, head_cs, parent_hyperparameter={ "parent": head_hp, "value": head_name }) return cs
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add NoPreprocessing") if default is None: defaults = [ 'no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD' ] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No rescalers found, please add any rescaling component.") if default is None: defaults = ['standardize', 'none', 'maxabs', 'minmax', 'normalize'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict] = None, default: str = None, include: Optional[Dict] = None, exclude: Optional[Dict] = None) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add NoPreprocessing") if default is None: defaults = ["feature_type"] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]( dataset_properties=dataset_properties). \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) return cs
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add NoPreprocessing") if default is None: defaults = ['no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space(name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def _get_base_search_space( self, cs: ConfigurationSpace, dataset_properties: Dict[str, Any], include: Optional[Dict[str, Any]], exclude: Optional[Dict[str, Any]], pipeline: List[Tuple[str, autoPyTorchChoice]]) -> ConfigurationSpace: if include is None: if self.include is None: include = {} else: include = self.include keys = [pair[0] for pair in pipeline] for key in include: if key not in keys: raise ValueError('Invalid key in include: %s; should be one ' 'of %s' % (key, keys)) if exclude is None: if self.exclude is None: exclude = {} else: exclude = self.exclude keys = [pair[0] for pair in pipeline] for key in exclude: if key not in keys: raise ValueError('Invalid key in exclude: %s; should be one ' 'of %s' % (key, keys)) matches = get_match_array(pipeline, dataset_properties, include=include, exclude=exclude) # Now we have only legal combinations at this step of the pipeline # Simple sanity checks assert np.sum(matches) != 0, "No valid pipeline found." assert np.sum(matches) <= np.size(matches), \ "'matches' is not binary; %s <= %d, %s" % \ (str(np.sum(matches)), np.size(matches), str(matches.shape)) # Iterate each dimension of the matches array (each step of the # pipeline) to see if we can add a hyperparameter for that step for node_idx, n_ in enumerate(pipeline): node_name, node = n_ is_choice = isinstance(node, autoPyTorchChoice) # if the node isn't a choice we can add it immediately because it # must be active (if it wasn't, np.sum(matches) would be zero if not is_choice: cs.add_configuration_space( node_name, node.get_hyperparameter_search_space(dataset_properties), ) # If the node is a choice, we have to figure out which of its # choices are actually legal choices else: choices_list = find_active_choices(matches, node, node_idx, dataset_properties, include.get(node_name), exclude.get(node_name)) sub_config_space = node.get_hyperparameter_search_space( dataset_properties, include=choices_list) cs.add_configuration_space(node_name, sub_config_space) # And now add forbidden parameter configurations # According to matches if np.sum(matches) < np.size(matches): cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=matches, dataset_properties=dataset_properties, include=include, exclude=exclude) return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ) -> ConfigurationSpace: """Returns the configuration space of the current chosen components Args: dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on default (Optional[str]): Default embedding to use include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive list, and will exclusively use this components. exclude: Optional[Dict[str, Any]]: which components to skip Returns: ConfigurationSpace: the configuration space of the hyper-parameters of the chosen component """ cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_embedding = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_embedding) == 0 and 'tabular' in str( dataset_properties['task_type']): raise ValueError("No embedding found") if available_embedding == 0: return cs if default is None: defaults = [ 'NoEmbedding', 'LearnedEntityEmbedding', ] for default_ in defaults: if default_ in available_embedding: default = default_ break categorical_columns = dataset_properties['categorical_columns'] \ if isinstance(dataset_properties['categorical_columns'], List) else [] updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset( available_embedding): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format( self.__class__.__name__, available_embedding, choice_hyperparameter.value_range)) if len(categorical_columns) == 0: assert len(choice_hyperparameter.value_range) == 1 if 'NoEmbedding' not in choice_hyperparameter.value_range: raise ValueError( "Provided {} in choices, however, the dataset " "is incompatible with it".format( choice_hyperparameter.value_range)) embedding = CSH.CategoricalHyperparameter( '__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: if len(categorical_columns) == 0: default = 'NoEmbedding' if include is not None and default not in include: raise ValueError( "Provided {} in include, however, the dataset " "is incompatible with it".format(include)) embedding = CSH.CategoricalHyperparameter( '__choice__', ['NoEmbedding'], default_value=default) else: embedding = CSH.CategoricalHyperparameter( '__choice__', list(available_embedding.keys()), default_value=default) cs.add_hyperparameter(embedding) for name in embedding.choices: updates = self._get_search_space_updates(prefix=name) config_space = available_embedding[ name].get_hyperparameter_search_space( dataset_properties, # type: ignore **updates) parent_hyperparameter = {'parent': embedding, 'value': name} cs.add_configuration_space( name, config_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ) -> ConfigurationSpace: """Returns the configuration space of the current chosen components Args: dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on default (Optional[str]): Default head to use include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive list, and will exclusively use this components. exclude: Optional[Dict[str, Any]]: which components to skip Returns: ConfigurationSpace: the configuration space of the hyper-parameters of the chosen component """ cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_heads = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_heads) == 0: raise ValueError("No head found") if default is None: defaults = [ 'FullyConnectedHead', 'FullyConvolutional2DHead', ] for default_ in defaults: if default_ in available_heads: default = default_ break updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset( available_heads): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format( self.__class__.__name__, available_heads, choice_hyperparameter.value_range)) head = CSH.CategoricalHyperparameter( '__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: head = CSH.CategoricalHyperparameter('__choice__', list(available_heads.keys()), default_value=default) cs.add_hyperparameter(head) for name in head.choices: updates = self._get_search_space_updates(prefix=name) config_space = available_heads[ name].get_hyperparameter_search_space( dataset_properties, # type: ignore **updates) parent_hyperparameter = {'parent': head, 'value': name} cs.add_configuration_space( name, config_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[ str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = dict() dataset_properties = {**self.dataset_properties, **dataset_properties} available_ = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_) == 0: raise ValueError( "no feature preprocessors found, please add a feature preprocessor" ) if default is None: defaults = [ 'NoFeaturePreprocessor', 'FastICA', 'KernelPCA', 'RandomKitchenSinks', 'Nystroem', 'PolynomialFeatures', 'PowerTransformer', 'TruncatedSVD', ] for default_ in defaults: if default_ in available_: if include is not None and default_ not in include: continue if exclude is not None and default_ in exclude: continue default = default_ break numerical_columns = dataset_properties['numerical_columns'] \ if isinstance(dataset_properties['numerical_columns'], List) else [] updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset(available_): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format( self.__class__.__name__, available_, choice_hyperparameter.value_range)) if len(numerical_columns) == 0: assert len(choice_hyperparameter.value_range) == 1 assert 'NoFeaturePreprocessor' in choice_hyperparameter.value_range, \ "Provided {} in choices, however, the dataset " \ "is incompatible with it".format(choice_hyperparameter.value_range) preprocessor = CSH.CategoricalHyperparameter( '__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: # add only no feature preprocessor to choice hyperparameters in case the dataset is only categorical if len(numerical_columns) == 0: default = 'NoFeaturePreprocessor' if include is not None and default not in include: raise ValueError( "Provided {} in include, however, " "the dataset is incompatible with it".format(include)) preprocessor = CSH.CategoricalHyperparameter( '__choice__', ['NoFeaturePreprocessor'], default_value=default) else: # Truncated SVD requires n_features > n_components if len(numerical_columns) == 1: del available_['TruncatedSVD'] preprocessor = CSH.CategoricalHyperparameter( '__choice__', list(available_.keys()), default_value=default) cs.add_hyperparameter(preprocessor) # add only child hyperparameters of preprocessor choices for name in preprocessor.choices: updates = self._get_search_space_updates(prefix=name) config_space = available_[name].get_hyperparameter_search_space( dataset_properties, # type:ignore **updates) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, config_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
from copy import copy, deepcopy from pickle import dumps, loads from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter weights = [0.25, 0.5, 0.25] hp = CategoricalHyperparameter("B", ["1", "2", "3"], weights=weights) sub_cs = ConfigurationSpace() sub_cs.add_hyperparameter(hp) cs = ConfigurationSpace() cs.add_configuration_space("A", sub_cs) print(deepcopy(sub_cs).get_hyperparameter("B").probabilities, weights) print(copy(sub_cs).get_hyperparameter("B").probabilities, weights) print(loads(dumps(sub_cs)).get_hyperparameter("B").probabilities, weights) print(cs.get_hyperparameter("A:B").probabilities, weights) print(deepcopy(cs).get_hyperparameter("A:B").probabilities, weights) print(copy(cs).get_hyperparameter("A:B").probabilities, weights) print(loads(dumps(cs)).get_hyperparameter("A:B").probabilities, weights)
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[ str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None) -> ConfigurationSpace: cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = dict() dataset_properties = {**self.dataset_properties, **dataset_properties} available_scalers = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_scalers) == 0: raise ValueError("no scalers found, please add a scaler") if default is None: defaults = [ 'StandardScaler', 'Normalizer', 'MinMaxScaler', 'NoScaler' ] for default_ in defaults: if default_ in available_scalers: default = default_ break numerical_columns = dataset_properties['numerical_columns']\ if isinstance(dataset_properties['numerical_columns'], List) else [] updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset( available_scalers): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format( self.__class__.__name__, available_scalers, choice_hyperparameter.value_range)) if len(numerical_columns) == 0: assert len(choice_hyperparameter.value_range) == 1 if 'NoScaler' not in choice_hyperparameter.value_range: raise ValueError( "Provided {} in choices, however, the dataset " "is incompatible with it".format( choice_hyperparameter.value_range)) preprocessor = CSH.CategoricalHyperparameter( '__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: # add only no scaler to choice hyperparameters in case the dataset is only categorical if len(numerical_columns) == 0: default = 'NoScaler' if include is not None and default not in include: raise ValueError( "Provided {} in include, however, " "the dataset is incompatible with it".format(include)) preprocessor = CSH.CategoricalHyperparameter( '__choice__', ['NoScaler'], default_value=default) else: preprocessor = CSH.CategoricalHyperparameter( '__choice__', list(available_scalers.keys()), default_value=default) cs.add_hyperparameter(preprocessor) # add only child hyperparameters of preprocessor choices for name in preprocessor.choices: updates = self._get_search_space_updates(prefix=name) config_space = available_scalers[ name].get_hyperparameter_search_space( dataset_properties, # type:ignore **updates) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space( name, config_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def recursion(self, hdl: Dict, path=()) -> ConfigurationSpace: cs = ConfigurationSpace() # 检测一下这个dict是否在直接描述超参 key_list = list(hdl.keys()) if len(key_list) == 0: cs.add_hyperparameter(Constant("placeholder", "placeholder")) return cs else: sample_key = key_list[0] sample_value = hdl[sample_key] if is_hdl_bottom(sample_key, sample_value): store = {} conditions_dict = {} for key, value in hdl.items(): if key.startswith("__"): conditions_dict[key] = value else: # assert isinstance(value, dict) # fixme : 可以对常量进行编码 hp = self.__parse_dict_to_config(key, value) # hp.name = key cs.add_hyperparameter(hp) store[key] = hp for key, value in conditions_dict.items(): if key == "__condition": assert isinstance(value, list) for item in value: cond = self.__condition(item, store) cs.add_condition(cond) elif key == "__activate": self.__activate(value, store, cs) elif key == "__forbidden": self.__forbidden(value, store, cs) elif key == "__rely_model": RelyModels.info.append([value, deepcopy(path)]) return cs pattern = re.compile(r"(.*)\((.*)\)") for key, value in hdl.items(): mat = pattern.match(key) if mat: groups = mat.groups() assert len(groups) == 2 prefix_name, method = groups value_list = list(value.keys()) assert len(value_list) >= 1 if method == "choice": pass else: raise NotImplementedError() cur_cs = ConfigurationSpace() assert isinstance(value, dict) # 不能用constant,会报错 choice2proba = {} not_specific_proba_choices = [] sum_proba = 0 for k in value_list: v = value[k] if isinstance(v, dict) and "__proba" in v: proba = v.pop("__proba") choice2proba[k] = proba sum_proba += proba else: not_specific_proba_choices.append(k) if sum_proba <= 1: if len(not_specific_proba_choices) > 0: p_rest = (1 - sum_proba) / len(not_specific_proba_choices) for not_specific_proba_choice in not_specific_proba_choices: choice2proba[not_specific_proba_choice] = p_rest else: choice2proba = {k: 1 / len(value_list) for k in value_list} proba_list = [choice2proba[k] for k in value_list] value_list = list(map(smac_hdl._encode, value_list)) # choices must be str option_param = CategoricalHyperparameter( '__choice__', value_list, weights=proba_list) # todo : default cur_cs.add_hyperparameter(option_param) for sub_key, sub_value in value.items(): assert isinstance(sub_value, dict) sub_cs = self.recursion(sub_value, path=list(path) + [prefix_name, sub_key]) parent_hyperparameter = { 'parent': option_param, 'value': sub_key } cur_cs.add_configuration_space( sub_key, sub_cs, parent_hyperparameter=parent_hyperparameter) cs.add_configuration_space(prefix_name, cur_cs) elif isinstance(value, dict): sub_cs = self.recursion(value, path=list(path) + [key]) cs.add_configuration_space(key, sub_cs) else: raise NotImplementedError() return cs
def get_hyperspace(data_info, include_estimators=None, include_preprocessors=None): if data_info is None or not isinstance(data_info, dict): data_info = dict() if 'is_sparse' not in data_info: # This dataset is probaby dense data_info['is_sparse'] = False sparse = data_info['is_sparse'] task_type = data_info['task'] multilabel = (task_type == MULTILABEL_CLASSIFICATION) multiclass = (task_type == MULTICLASS_CLASSIFICATION) if task_type in CLASSIFICATION_TASKS: data_info['multilabel'] = multilabel data_info['multiclass'] = multiclass data_info['target_type'] = 'classification' pipe_type = 'classifier' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree"] feature_learning_ = ["kitchen_sinks", "nystroem_sampler"] elif task_type in REGRESSION_TASKS: data_info['target_type'] = 'regression' pipe_type = 'regressor' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] else: raise NotImplementedError() include, exclude = dict(), dict() if include_preprocessors is not None: include["preprocessor"] = include_preprocessors if include_estimators is not None: include[pipe_type] = include_estimators cs = ConfigurationSpace() # Construct pipeline # FIXME OrderedDIct? pipeline = get_pipeline(data_info['task']) # TODO include, exclude, pipeline keys = [pair[0] for pair in pipeline] for key in include: if key not in keys: raise ValueError('Invalid key in include: %s; should be one ' 'of %s' % (key, keys)) for key in exclude: if key not in keys: raise ValueError('Invalid key in exclude: %s; should be one ' 'of %s' % (key, keys)) # Construct hyperspace # TODO What's the 'signed' stands for? if 'signed' not in data_info: # This dataset probably contains unsigned data data_info['signed'] = False match = check_pipeline(pipeline, data_info, include=include, exclude=exclude) # Now we have only legal combinations at this step of the pipeline # Simple sanity checks assert np.sum(match) != 0, "No valid pipeline found." assert np.sum(match) <= np.size(match), \ "'matches' is not binary; %s <= %d, %s" % \ (str(np.sum(match)), np.size(match), str(match.shape)) # Iterate each dimension of the matches array (each step of the # pipeline) to see if we can add a hyperparameter for that step for node_idx, n_ in enumerate(pipeline): node_name, node = n_ is_choice = hasattr(node, "get_available_components") # if the node isn't a choice we can add it immediately because it # must be active (if it wouldn't, np.sum(matches) would be zero if not is_choice: cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info)) # If the node isn't a choice, we have to figure out which of it's # choices are actually legal choices else: choices_list = find_active_choices(match, node, node_idx,data_info, include=include.get(node_name), exclude=exclude.get(node_name)) cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info, include=choices_list)) # And now add forbidden parameter configurations # According to matches if np.sum(match) < np.size(match): cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=match, dataset_properties=data_info, include=include, exclude=exclude) components = cs.get_hyperparameter('%s:__choice__' % pipe_type).choices availables = pipeline[-1][1].get_available_components(data_info) preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices #available_preprocessors = pipeline[-2][1].get_available_components(data_info) possible_default = copy.copy(list(availables.keys())) default = cs.get_hyperparameter('%s:__choice__' % pipe_type).default del possible_default[possible_default.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in components: # TODO regression dataset_properties=None if SPARSE in availables[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( '%s:__choice__' % pipe_type), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default # which would take too long # Combinations of non-linear models with feature learning: for c, f in itertools.product(components_, feature_learning_): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "%s:__choice__" % pipe_type), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default if task_type in CLASSIFICATION_TASKS: # Won't work # Multinomial NB etc don't use with features learning, pca etc components_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in itertools.product(components_, preproc_with_negative_X): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('classifier:__choice__').default = default return cs