def test_get_hyperparameter(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) retval = cs.get_hyperparameter("parent") self.assertEqual(hp1, retval) retval = cs.get_hyperparameter("child") self.assertEqual(hp2, retval) self.assertRaises(KeyError, cs.get_hyperparameter, "grandfather")
def get_hyperparameter_search_space(dataset_properties=None): max_num_layers = 7 # Maximum number of layers coded # Hacky way to condition layers params based on the number of layers # 'c'=1, 'd'=2, 'e'=3 ,'f'=4', g ='5', h='6' + output_layer layer_choices = [ chr(i) for i in range(ord('c'), ord('b') + max_num_layers) ] batch_size = UniformIntegerHyperparameter("batch_size", 32, 4096, log=True, default=32) number_epochs = UniformIntegerHyperparameter("number_epochs", 2, 80, default=5) num_layers = CategoricalHyperparameter("num_layers", choices=layer_choices, default='c') lr = UniformFloatHyperparameter("learning_rate", 1e-6, 1.0, log=True, default=0.01) l2 = UniformFloatHyperparameter("lambda2", 1e-7, 1e-2, log=True, default=1e-4) dropout_output = UniformFloatHyperparameter("dropout_output", 0.0, 0.99, default=0.5) # Define basic hyperparameters and define the config space # basic means that are independent from the number of layers cs = ConfigurationSpace() cs.add_hyperparameter(number_epochs) cs.add_hyperparameter(batch_size) cs.add_hyperparameter(num_layers) cs.add_hyperparameter(lr) cs.add_hyperparameter(l2) cs.add_hyperparameter(dropout_output) # Define parameters with different child parameters and conditions solver_choices = [ "adam", "adadelta", "adagrad", "sgd", "momentum", "nesterov", "smorm3s" ] solver = CategoricalHyperparameter(name="solver", choices=solver_choices, default="smorm3s") beta1 = UniformFloatHyperparameter("beta1", 1e-4, 0.1, log=True, default=0.1) beta2 = UniformFloatHyperparameter("beta2", 1e-4, 0.1, log=True, default=0.01) rho = UniformFloatHyperparameter("rho", 0.05, 0.99, log=True, default=0.95) momentum = UniformFloatHyperparameter("momentum", 0.3, 0.999, default=0.9) # TODO: Add policy based on this sklearn sgd policy_choices = ['fixed', 'inv', 'exp', 'step'] lr_policy = CategoricalHyperparameter(name="lr_policy", choices=policy_choices, default='fixed') gamma = UniformFloatHyperparameter(name="gamma", lower=1e-3, upper=1e-1, default=1e-2) power = UniformFloatHyperparameter("power", 0.0, 1.0, default=0.5) epoch_step = UniformIntegerHyperparameter("epoch_step", 2, 20, default=5) cs.add_hyperparameter(solver) cs.add_hyperparameter(beta1) cs.add_hyperparameter(beta2) cs.add_hyperparameter(momentum) cs.add_hyperparameter(rho) cs.add_hyperparameter(lr_policy) cs.add_hyperparameter(gamma) cs.add_hyperparameter(power) cs.add_hyperparameter(epoch_step) # Define parameters that are needed it for each layer output_activation_choices = ['softmax', 'sigmoid', 'softplus', 'tanh'] activations_choices = [ 'sigmoid', 'tanh', 'scaledTanh', 'elu', 'relu', 'leaky', 'linear' ] weight_choices = [ 'constant', 'normal', 'uniform', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', 'ortogonal', 'sparse' ] # Iterate over parameters that are used in each layer for i in range(1, max_num_layers): layer_units = UniformIntegerHyperparameter("num_units_layer_" + str(i), 64, 4096, log=True, default=128) cs.add_hyperparameter(layer_units) layer_dropout = UniformFloatHyperparameter("dropout_layer_" + str(i), 0.0, 0.99, default=0.5) cs.add_hyperparameter(layer_dropout) weight_initialization = CategoricalHyperparameter( 'weight_init_' + str(i), choices=weight_choices, default='he_normal') cs.add_hyperparameter(weight_initialization) layer_std = UniformFloatHyperparameter("std_layer_" + str(i), 1e-6, 0.1, log=True, default=0.005) cs.add_hyperparameter(layer_std) layer_activation = CategoricalHyperparameter( "activation_layer_" + str(i), choices=activations_choices, default="relu") cs.add_hyperparameter(layer_activation) layer_leakiness = UniformFloatHyperparameter('leakiness_layer_' + str(i), 0.01, 0.99, default=0.3) cs.add_hyperparameter(layer_leakiness) layer_tanh_alpha = UniformFloatHyperparameter('tanh_alpha_layer_' + str(i), 0.5, 1.0, default=2. / 3.) cs.add_hyperparameter(layer_tanh_alpha) layer_tanh_beta = UniformFloatHyperparameter('tanh_beta_layer_' + str(i), 1.1, 3.0, log=True, default=1.7159) cs.add_hyperparameter(layer_tanh_beta) # TODO: Could be in a function in a new module for i in range(2, max_num_layers): # Condition layers parameter on layer choice layer_unit_param = cs.get_hyperparameter("num_units_layer_" + str(i)) layer_cond = InCondition(child=layer_unit_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition dropout parameter on layer choice layer_dropout_param = cs.get_hyperparameter("dropout_layer_" + str(i)) layer_cond = InCondition(child=layer_dropout_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition weight initialization on layer choice layer_weight_param = cs.get_hyperparameter("weight_init_" + str(i)) layer_cond = InCondition(child=layer_weight_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition std parameter on weight layer initialization choice layer_std_param = cs.get_hyperparameter("std_layer_" + str(i)) weight_cond = EqualsCondition(child=layer_std_param, parent=layer_weight_param, value='normal') cs.add_condition(weight_cond) # Condition activation parameter on layer choice layer_activation_param = cs.get_hyperparameter( "activation_layer_" + str(i)) layer_cond = InCondition(child=layer_activation_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition leakiness on activation choice layer_leakiness_param = cs.get_hyperparameter("leakiness_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_leakiness_param, parent=layer_activation_param, value='leaky') cs.add_condition(activation_cond) # Condition tanh on activation choice layer_tanh_alpha_param = cs.get_hyperparameter( "tanh_alpha_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_tanh_alpha_param, parent=layer_activation_param, value='scaledTanh') cs.add_condition(activation_cond) layer_tanh_beta_param = cs.get_hyperparameter("tanh_beta_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_tanh_beta_param, parent=layer_activation_param, value='scaledTanh') cs.add_condition(activation_cond) # Conditioning on solver momentum_depends_on_solver = InCondition( momentum, solver, values=["momentum", "nesterov"]) beta1_depends_on_solver = EqualsCondition(beta1, solver, "adam") beta2_depends_on_solver = EqualsCondition(beta2, solver, "adam") rho_depends_on_solver = EqualsCondition(rho, solver, "adadelta") cs.add_condition(momentum_depends_on_solver) cs.add_condition(beta1_depends_on_solver) cs.add_condition(beta2_depends_on_solver) cs.add_condition(rho_depends_on_solver) # Conditioning on learning rate policy lr_policy_depends_on_solver = InCondition( lr_policy, solver, ["adadelta", "adagrad", "sgd", "momentum", "nesterov"]) gamma_depends_on_policy = InCondition(child=gamma, parent=lr_policy, values=["inv", "exp", "step"]) power_depends_on_policy = EqualsCondition(power, lr_policy, "inv") epoch_step_depends_on_policy = EqualsCondition(epoch_step, lr_policy, "step") cs.add_condition(lr_policy_depends_on_solver) cs.add_condition(gamma_depends_on_policy) cs.add_condition(power_depends_on_policy) cs.add_condition(epoch_step_depends_on_policy) return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_regressors = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_regressor = copy.copy(list( available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default del possible_default_regressor[ possible_default_regressor.index(default)] # A regressor which can handle sparse data after the densifier for key in regressors: if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "regressor:__choice__"), r), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Create the hyperparameter configuration space. Parameters ---------- include : dict (optional, default=None) Returns ------- """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'classification' if dataset_properties['target_type'] != 'classification': dataset_properties['target_type'] = 'classification' pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) classifiers = cs.get_hyperparameter('classifier:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_classifiers = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_classifier = copy.copy(list( available_classifiers.keys())) default = cs.get_hyperparameter('classifier:__choice__').default del possible_default_classifier[possible_default_classifier.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in classifiers: if SPARSE in available_classifiers[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'classifier:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # which would take too long # Combinations of non-linear models with feature learning: classifiers_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree"] feature_learning = ["kitchen_sinks", "nystroem_sampler"] for c, f in product(classifiers_, feature_learning): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # Won't work # Multinomial NB etc don't use with features learning, pca etc classifiers_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in product(classifiers_, preproc_with_negative_X): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default return cs
def get_hyperparameter_search_space(dataset_properties=None): max_num_layers = 7 # Maximum number of layers coded # Hacky way to condition layers params based on the number of layers # 'c'=1, 'd'=2, 'e'=3 ,'f'=4', g ='5', h='6' + output_layer layer_choices = [chr(i) for i in range(ord('c'), ord('b') + max_num_layers)] batch_size = UniformIntegerHyperparameter("batch_size", 32, 4096, log=True, default=32) number_epochs = UniformIntegerHyperparameter("number_epochs", 2, 80, default=5) num_layers = CategoricalHyperparameter("num_layers", choices=layer_choices, default='c') lr = UniformFloatHyperparameter("learning_rate", 1e-6, 1.0, log=True, default=0.01) l2 = UniformFloatHyperparameter("lambda2", 1e-7, 1e-2, log=True, default=1e-4) dropout_output = UniformFloatHyperparameter("dropout_output", 0.0, 0.99, default=0.5) # Define basic hyperparameters and define the config space # basic means that are independent from the number of layers cs = ConfigurationSpace() cs.add_hyperparameter(number_epochs) cs.add_hyperparameter(batch_size) cs.add_hyperparameter(num_layers) cs.add_hyperparameter(lr) cs.add_hyperparameter(l2) cs.add_hyperparameter(dropout_output) # Define parameters with different child parameters and conditions solver_choices = ["adam", "adadelta", "adagrad", "sgd", "momentum", "nesterov", "smorm3s"] solver = CategoricalHyperparameter(name="solver", choices=solver_choices, default="smorm3s") beta1 = UniformFloatHyperparameter("beta1", 1e-4, 0.1, log=True, default=0.1) beta2 = UniformFloatHyperparameter("beta2", 1e-4, 0.1, log=True, default=0.01) rho = UniformFloatHyperparameter("rho", 0.05, 0.99, log=True, default=0.95) momentum = UniformFloatHyperparameter("momentum", 0.3, 0.999, default=0.9) # TODO: Add policy based on this sklearn sgd policy_choices = ['fixed', 'inv', 'exp', 'step'] lr_policy = CategoricalHyperparameter(name="lr_policy", choices=policy_choices, default='fixed') gamma = UniformFloatHyperparameter(name="gamma", lower=1e-3, upper=1e-1, default=1e-2) power = UniformFloatHyperparameter("power", 0.0, 1.0, default=0.5) epoch_step = UniformIntegerHyperparameter("epoch_step", 2, 20, default=5) cs.add_hyperparameter(solver) cs.add_hyperparameter(beta1) cs.add_hyperparameter(beta2) cs.add_hyperparameter(momentum) cs.add_hyperparameter(rho) cs.add_hyperparameter(lr_policy) cs.add_hyperparameter(gamma) cs.add_hyperparameter(power) cs.add_hyperparameter(epoch_step) # Define parameters that are needed it for each layer output_activation_choices = ['softmax', 'sigmoid', 'softplus', 'tanh'] activations_choices = ['sigmoid', 'tanh', 'scaledTanh', 'elu', 'relu', 'leaky', 'linear'] weight_choices = ['constant', 'normal', 'uniform', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', 'ortogonal', 'sparse'] # Iterate over parameters that are used in each layer for i in range(1, max_num_layers): layer_units = UniformIntegerHyperparameter("num_units_layer_" + str(i), 64, 4096, log=True, default=128) cs.add_hyperparameter(layer_units) layer_dropout = UniformFloatHyperparameter("dropout_layer_" + str(i), 0.0, 0.99, default=0.5) cs.add_hyperparameter(layer_dropout) weight_initialization = CategoricalHyperparameter('weight_init_' + str(i), choices=weight_choices, default='he_normal') cs.add_hyperparameter(weight_initialization) layer_std = UniformFloatHyperparameter("std_layer_" + str(i), 1e-6, 0.1, log=True, default=0.005) cs.add_hyperparameter(layer_std) layer_activation = CategoricalHyperparameter("activation_layer_" + str(i), choices=activations_choices, default="relu") cs.add_hyperparameter(layer_activation) layer_leakiness = UniformFloatHyperparameter('leakiness_layer_' + str(i), 0.01, 0.99, default=0.3) cs.add_hyperparameter(layer_leakiness) layer_tanh_alpha = UniformFloatHyperparameter('tanh_alpha_layer_' + str(i), 0.5, 1.0, default=2. / 3.) cs.add_hyperparameter(layer_tanh_alpha) layer_tanh_beta = UniformFloatHyperparameter('tanh_beta_layer_' + str(i), 1.1, 3.0, log=True, default=1.7159) cs.add_hyperparameter(layer_tanh_beta) # TODO: Could be in a function in a new module for i in range(2, max_num_layers): # Condition layers parameter on layer choice layer_unit_param = cs.get_hyperparameter("num_units_layer_" + str(i)) layer_cond = InCondition(child=layer_unit_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition dropout parameter on layer choice layer_dropout_param = cs.get_hyperparameter("dropout_layer_" + str(i)) layer_cond = InCondition(child=layer_dropout_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition weight initialization on layer choice layer_weight_param = cs.get_hyperparameter("weight_init_" + str(i)) layer_cond = InCondition(child=layer_weight_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition std parameter on weight layer initialization choice layer_std_param = cs.get_hyperparameter("std_layer_" + str(i)) weight_cond = EqualsCondition(child=layer_std_param, parent=layer_weight_param, value='normal') cs.add_condition(weight_cond) # Condition activation parameter on layer choice layer_activation_param = cs.get_hyperparameter("activation_layer_" + str(i)) layer_cond = InCondition(child=layer_activation_param, parent=num_layers, values=[l for l in layer_choices[i - 1:]]) cs.add_condition(layer_cond) # Condition leakiness on activation choice layer_leakiness_param = cs.get_hyperparameter("leakiness_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_leakiness_param, parent=layer_activation_param, value='leaky') cs.add_condition(activation_cond) # Condition tanh on activation choice layer_tanh_alpha_param = cs.get_hyperparameter("tanh_alpha_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_tanh_alpha_param, parent=layer_activation_param, value='scaledTanh') cs.add_condition(activation_cond) layer_tanh_beta_param = cs.get_hyperparameter("tanh_beta_layer_" + str(i)) activation_cond = EqualsCondition(child=layer_tanh_beta_param, parent=layer_activation_param, value='scaledTanh') cs.add_condition(activation_cond) # Conditioning on solver momentum_depends_on_solver = InCondition(momentum, solver, values=["momentum", "nesterov"]) beta1_depends_on_solver = EqualsCondition(beta1, solver, "adam") beta2_depends_on_solver = EqualsCondition(beta2, solver, "adam") rho_depends_on_solver = EqualsCondition(rho, solver, "adadelta") cs.add_condition(momentum_depends_on_solver) cs.add_condition(beta1_depends_on_solver) cs.add_condition(beta2_depends_on_solver) cs.add_condition(rho_depends_on_solver) # Conditioning on learning rate policy lr_policy_depends_on_solver = InCondition(lr_policy, solver, ["adadelta", "adagrad", "sgd", "momentum", "nesterov"]) gamma_depends_on_policy = InCondition(child=gamma, parent=lr_policy, values=["inv", "exp", "step"]) power_depends_on_policy = EqualsCondition(power, lr_policy, "inv") epoch_step_depends_on_policy = EqualsCondition(epoch_step, lr_policy, "step") cs.add_condition(lr_policy_depends_on_solver) cs.add_condition(gamma_depends_on_policy) cs.add_condition(power_depends_on_policy) cs.add_condition(epoch_step_depends_on_policy) return cs
def get_hyperparameter_search_space(cls, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance( dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False pipeline = cls._get_pipeline() cs = cls._get_hyperparameter_search_space(cs, dataset_properties, exclude, include, pipeline) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter( 'preprocessor:__choice__').choices available_regressors = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_regressor = copy.copy( list(available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default del possible_default_regressor[possible_default_regressor.index( default)] # A regressor which can handle sparse data after the densifier for key in regressors: if SPARSE in available_regressors[key].get_properties( dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier'))) break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration." ) cs.get_hyperparameter( 'regressor:__choice__').default = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = [ "adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest" ] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter("regressor:__choice__"), r), ForbiddenEqualsClause( cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default = default return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print("Skipping: %s" % line) continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter} try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = defaultdict(list) for condition in conditions: child_name = condition[0] conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print("Skipping: %s" % line) continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = defaultdict(list) for condition in conditions: child_name = condition[0] conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def get_hyperparameter_search_space(cls, estimator_name, default_estimator, estimator_components, preprocessor_components, dataset_properties, always_active): """Return the configuration space for the CASH problem. This method should be called by the method get_hyperparameter_search_space of a subclass. After the subclass assembles a list of available estimators and preprocessor components, _get_hyperparameter_search_space can be called to do the work of creating the actual HPOlibConfigSpace.configuration_space.ConfigurationSpace object. Parameters ---------- estimator_name : str Name of the estimator hyperparameter which will be used in the configuration space. For a classification task, this would be 'classifier'. estimator_components : dict {name: component} Dictionary with all estimator components to be included in the configuration space. preprocessor_components : dict {name: component} Dictionary with all preprocessor components to be included in the configuration space. . always_active : list of str A list of components which will always be active in the pipeline. This is useful for components like imputation which have hyperparameters to be configured, but which do not have any parent. default_estimator : str Default value for the estimator hyperparameter. Returns ------- cs : HPOlibConfigSpace.configuration_space.Configuration The configuration space describing the AutoSklearnClassifier. """ cs = ConfigurationSpace() available_estimators = estimator_components available_preprocessors = preprocessor_components if default_estimator is None: default_estimator = available_estimators.keys()[0] estimator = CategoricalHyperparameter(estimator_name, available_estimators.keys(), default=default_estimator) cs.add_hyperparameter(estimator) for name in available_estimators.keys(): # We have to retrieve the configuration space every time because # we change the objects it returns. If we reused it, we could not # retrieve the conditions further down # TODO implement copy for hyperparameters and forbidden and # conditions! estimator_configuration_space = available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in estimator_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(estimator_configuration_space. get_parents_of(parameter)) == 0: condition = EqualsCondition(new_parameter, estimator, name) cs.add_condition(condition) for condition in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendant_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_estimators[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.name.startswith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) preprocessor_choices = filter(lambda app: app not in always_active, available_preprocessors.keys()) preprocessor = CategoricalHyperparameter("preprocessor", ["None"] + preprocessor_choices, default='None') cs.add_hyperparameter(preprocessor) for name in available_preprocessors.keys(): preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) for parameter in preprocessor_configuration_space.get_hyperparameters(): new_parameter = copy.deepcopy(parameter) new_parameter.name = "%s:%s" % (name, new_parameter.name) cs.add_hyperparameter(new_parameter) # We must only add a condition if the hyperparameter is not # conditional on something else if len(preprocessor_configuration_space. get_parents_of( parameter)) == 0 and name not in always_active: condition = EqualsCondition(new_parameter, preprocessor, name) cs.add_condition(condition) for condition in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).get_conditions(): dlcs = condition.get_descendent_literal_conditions() for dlc in dlcs: if not dlc.child.name.startswith(name): dlc.child.name = "%s:%s" % (name, dlc.child.name) if not dlc.parent.name.startswith(name): dlc.parent.name = "%s:%s" % (name, dlc.parent.name) cs.add_condition(condition) for forbidden_clause in available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties).forbidden_clauses: dlcs = forbidden_clause.get_descendant_literal_clauses() for dlc in dlcs: if not dlc.hyperparameter.startwith(name): dlc.hyperparameter.name = "%s:%s" % (name, dlc.hyperparameter.name) cs.add_forbidden_clause(forbidden_clause) # Now try to add things for which we know that they don't work try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "select_percentile_classification:score_func"), "chi2"), ForbiddenEqualsClause(cs.get_hyperparameter( "rescaling:strategy"), "standard") )) except: pass return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: print "Skipping: %s" % line continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None # print "Parsing: " + line create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions for condition in conditions: child_name = condition[0] child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) configuration_space.add_condition(condition) if debug: print print "============== Reading Results" print "First 10 lines:" sp_list = ["%s: %s" % (j, str(searchspace[j])) for j in searchspace] print "\n".join(sp_list[:10]) print print "#Invalid lines: %d ( of %d )" % (line_ct - len(conditions) - ct, line_ct) print "#Parameter: %d" % len(searchspace) print "#Conditions: %d" % len(conditions) print "#Conditioned params: %d" % sum([ 1 if len(searchspace[j].conditions[0]) > 0 else 0 for j in searchspace ]) print "#Categorical: %d" % cat_ct print "#Continuous: %d" % cont_ct return configuration_space