def get_hyperparameter_search_space(cls, dataset_properties, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No preprocessors found, please add no_preprocessing") if default is None: defaults = ['no_preprocessing', 'select_percentile', 'pca', 'truncatedSVD'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) cs = add_component_deepcopy(cs, name, preprocessor_configuration_space) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["ls", "lad", "huber", "quantile"], default_value="ls") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter( "n_estimators", 50, 500, default_value=100) max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=3) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2, log=False) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1, log=False) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0, default_value=1) max_leaf_nodes = UnParametrizedHyperparameter( name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) alpha = UniformFloatHyperparameter( "alpha", lower=0.75, upper=0.99, default_value=0.9) cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease, alpha]) cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile'])) return cs
def get_hyperparameter_search_space(dataset_properties=None): N = UniformIntegerHyperparameter("N", 5, 20, default=10) precond = UniformFloatHyperparameter("precond", 0, 0.5, default=0.1) cs = ConfigurationSpace() cs.add_hyperparameter(N) cs.add_hyperparameter(precond) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_iter = UnParametrizedHyperparameter("n_iter", value=300) tol = UniformFloatHyperparameter("tol", 10 ** -5, 10 ** -1, default_value=10 ** -3, log=True) alpha_1 = UniformFloatHyperparameter(name="alpha_1", lower=10 ** -10, upper=10 ** -3, default_value=10 ** -6) alpha_2 = UniformFloatHyperparameter(name="alpha_2", log=True, lower=10 ** -10, upper=10 ** -3, default_value=10 ** -6) lambda_1 = UniformFloatHyperparameter(name="lambda_1", log=True, lower=10 ** -10, upper=10 ** -3, default_value=10 ** -6) lambda_2 = UniformFloatHyperparameter(name="lambda_2", log=True, lower=10 ** -10, upper=10 ** -3, default_value=10 ** -6) threshold_lambda = UniformFloatHyperparameter(name="threshold_lambda", log=True, lower=10 ** 3, upper=10 ** 5, default_value=10 ** 4) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") cs.add_hyperparameters([n_iter, tol, alpha_1, alpha_2, lambda_1, lambda_2, threshold_lambda, fit_intercept]) return cs
def add_params(cs: ConfigurationSpace): ''' adds parameters to ConfigurationSpace ''' switch = CategoricalHyperparameter( "StandardScaler", choices=[True, False], default=True) cs.add_hyperparameter(switch)
def test_write_log_int(self): expected = "int_log '--int_log ' i (2, 4)\n" int_log = UniformIntegerHyperparameter("int_log", 10, 100, log=True) cs = ConfigurationSpace() cs.add_hyperparameter(int_log) value = irace.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="squared_epsilon_insensitive") # Random Guess epsilon = UniformFloatHyperparameter( name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True) dual = Constant("dual", "False") # These are set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) fit_intercept =Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([C, loss, epsilon, dual, tol, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def test_write_ordinal(self): expected = "ord_a '--ord_a ' o {a,b,3}\n" cs = ConfigurationSpace() cs.add_hyperparameter( OrdinalHyperparameter("ord_a", ["a", "b", 3])) value = irace.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(cls, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() # Compile a list of legal preprocessors for this problem available_preprocessors = cls.get_available_components( data_prop=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No rescaling algorithm found.") if default is None: defaults = ['min/max', 'standardize', 'none', 'normalize'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter('__choice__', list( available_preprocessors.keys()), default=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) cs = add_component_deepcopy(cs, name, preprocessor_configuration_space) return cs
def get_hyperparameter_search_space(dataset_properties=None): # TODO add replace by zero! strategy = CategoricalHyperparameter( "strategy", ["mean", "median", "most_frequent"], default_value="mean") cs = ConfigurationSpace() cs.add_hyperparameter(strategy) return cs
def test_add_forbidden(self): m = numpy.ones([2, 3]) preprocessors_list = ['pa', 'pb'] classifier_list = ['ca', 'cb', 'cc'] cs = ConfigurationSpace() preprocessor = CategoricalHyperparameter(name='preprocessor', choices=preprocessors_list) classifier = CategoricalHyperparameter(name='classifier', choices=classifier_list) cs.add_hyperparameter(preprocessor) cs.add_hyperparameter(classifier) new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden( conf_space=cs, node_0_list=preprocessors_list, node_1_list=classifier_list, matches=m, node_0_name='preprocessor', node_1_name="classifier") self.assertEqual(len(new_cs.forbidden_clauses), 0) self.assertIsInstance(new_cs, ConfigurationSpace) m[1, 1] = 0 new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden( conf_space=cs, node_0_list=preprocessors_list, node_1_list=classifier_list, matches=m, node_0_name='preprocessor', node_1_name="classifier") self.assertEqual(len(new_cs.forbidden_clauses), 1) self.assertEqual(new_cs.forbidden_clauses[0].components[0].value, 'cb') self.assertEqual(new_cs.forbidden_clauses[0].components[1].value, 'pb') self.assertIsInstance(new_cs, ConfigurationSpace)
def test_write_new_q_float(self): expected = "Q16_float_a real [16.0, 1024.0] [520.0]" cs = ConfigurationSpace() cs.add_hyperparameter( UniformFloatHyperparameter("float_a", 16, 1024, q=16)) value = pcs_new.write(cs) self.assertEqual(expected, value)
def test_write_new_q_int(self): expected = "Q16_int_a integer [16, 1024] [520]" cs = ConfigurationSpace() cs.add_hyperparameter( UniformIntegerHyperparameter("int_a", 16, 1024, q=16)) value = pcs_new.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(dataset_properties=None): if dataset_properties is not None and \ (dataset_properties.get("sparse") is True or dataset_properties.get("signed") is False): allow_chi2 = False else: allow_chi2 = True possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine'] if allow_chi2: possible_kernels.append("chi2") kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf') n_components = UniformIntegerHyperparameter( "n_components", 50, 10000, default_value=100, log=True) gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default_value=0.1) degree = UniformIntegerHyperparameter('degree', 2, 5, 3) coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0) cs = ConfigurationSpace() cs.add_hyperparameters([kernel, degree, gamma, coef0, n_components]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) gamma_kernels = ["poly", "rbf", "sigmoid"] if allow_chi2: gamma_kernels.append("chi2") gamma_condition = InCondition(gamma, kernel, gamma_kernels) cs.add_conditions([degree_depends_on_poly, coef0_condition, gamma_condition]) return cs
def test_write_new_log10(self): expected = "a real [10.0, 1000.0] [100.0]log" cs = ConfigurationSpace() cs.add_hyperparameter( UniformFloatHyperparameter("a", 10, 1000, log=True)) value = pcs_new.write(cs) self.assertEqual(expected, value)
def test_write_forbidden(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) hp2 = UniformIntegerHyperparameter("child", 0, 2) hp3 = UniformIntegerHyperparameter("child2", 0, 2) hp4 = UniformIntegerHyperparameter("child3", 0, 2) hp5 = CategoricalHyperparameter("child4", [4, 5, 6, 7]) cs.add_hyperparameters([hp1, hp2, hp3, hp4, hp5]) forb2 = ForbiddenEqualsClause(hp1, 1) forb3 = ForbiddenInClause(hp2, range(2, 3)) forb4 = ForbiddenInClause(hp3, range(2, 3)) forb5 = ForbiddenInClause(hp4, range(2, 3)) forb6 = ForbiddenInClause(hp5, [6, 7]) and1 = ForbiddenAndConjunction(forb2, forb3) and2 = ForbiddenAndConjunction(forb2, forb4) and3 = ForbiddenAndConjunction(forb2, forb5) cs.add_forbidden_clauses( [forb2, forb3, forb4, forb5, forb6, and1, and2, and3]) irace.write(cs) # generates file called forbidden.txt
def get_hyperparameter_search_space(dataset_properties=None): # TODO add replace by zero! strategy = CategoricalHyperparameter( "strategy", ["none", "weighting"], default_value="none") cs = ConfigurationSpace() cs.add_hyperparameter(strategy) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "deviance") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter( "n_estimators", 50, 500, default_value=100) max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=3) criterion = CategoricalHyperparameter( 'criterion', ['friedman_mse', 'mse', 'mae'], default_value='mse') min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default_value=1.0) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0 , default_value=1) max_leaf_nodes = UnParametrizedHyperparameter( name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease]) return cs
def test_write_float(self): expected = "float_a '--float_a ' r (16.000000, 1024.000000)\n" cs = ConfigurationSpace() cs.add_hyperparameter( UniformFloatHyperparameter("float_a", 16, 1024)) value = irace.write(cs) self.assertEqual(expected, value)
def test_build_new_GreaterThanIntCondition(self): expected = "a real [0.0, 1.0] [0.5]\n" \ "b integer [0, 10] [5]\n\n" \ "b | a > 0.5" cs = ConfigurationSpace() a = UniformFloatHyperparameter("a", 0, 1, 0.5) b = UniformIntegerHyperparameter("b", 0, 10, 5) cs.add_hyperparameter(a) cs.add_hyperparameter(b) cond = GreaterThanCondition(b, a, 0.5) cs.add_condition(cond) value = pcs_new.write(cs) self.assertEqual(expected, value) expected = "a integer [0, 10] [5]\n" \ "b integer [0, 10] [5]\n\n" \ "b | a > 5" cs = ConfigurationSpace() a = UniformIntegerHyperparameter("a", 0, 10, 5) b = UniformIntegerHyperparameter("b", 0, 10, 5) cs.add_hyperparameter(a) cs.add_hyperparameter(b) cond = GreaterThanCondition(b, a, 5) cs.add_condition(cond) value = pcs_new.write(cs) self.assertEqual(expected, value)
def test_write_categorical(self): expected = "cat_a '--cat_a ' c {a,b,c}\n" cs = ConfigurationSpace() cs.add_hyperparameter( CategoricalHyperparameter("cat_a", ["a", "b", "c"])) value = irace.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(dataset_properties=None): gamma = UniformFloatHyperparameter( "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True) n_components = UniformIntegerHyperparameter( "n_components", 50, 10000, default_value=100, log=True) cs = ConfigurationSpace() cs.add_hyperparameters([gamma, n_components]) return cs
def test_write_log_float(self): import numpy as np expected = "float_log '--float_log ' r (2.000000, 5.000000)\n" float_log = UniformFloatHyperparameter("float_log", np.exp(2), np.exp(5), log=True) cs = ConfigurationSpace() cs.add_hyperparameter(float_log) value = irace.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() use_minimum_fraction = cs.add_hyperparameter(CategoricalHyperparameter( "use_minimum_fraction", ["True", "False"], default="True")) minimum_fraction = cs.add_hyperparameter(UniformFloatHyperparameter( "minimum_fraction", lower=.0001, upper=0.5, default=0.01, log=True)) cs.add_condition(EqualsCondition(minimum_fraction, use_minimum_fraction, 'True')) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() alpha = UniformFloatHyperparameter( "alpha", 10 ** -5, 10., log=True, default_value=1.) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3, log=True) cs.add_hyperparameters([alpha, fit_intercept, tol]) return cs
def get_hyperparameter_search_space(dataset_properties=None): gamma = UniformFloatHyperparameter( "gamma", 0.3, 2., default=1.0) n_components = UniformIntegerHyperparameter( "n_components", 50, 10000, default=100, log=True) cs = ConfigurationSpace() cs.add_hyperparameter(gamma) cs.add_hyperparameter(n_components) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() alpha = cs.add_hyperparameter(UniformFloatHyperparameter( "alpha", 10 ** -5, 10., log=True, default=1.)) fit_intercept = cs.add_hyperparameter(UnParametrizedHyperparameter( "fit_intercept", "True")) tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) return cs
def _construct_lt_condition( condition: Dict, cs: ConfigurationSpace, ) -> LessThanCondition: return LessThanCondition( child=cs.get_hyperparameter(condition['child']), parent=cs.get_hyperparameter(condition['parent']), value=condition['value'], )
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default = param_list[-2] param = create["ordinal"](name=name, sequence=sequence, default=default) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append( condition_specification( child_name, element_list, configuration_space)) else: # now taking care of ands ands = [] for and_part in condition: element_list = [ element for part in condition for element in and_part.split() ] ands.append( condition_specification( child_name, element_list, configuration_space)) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [ element for element in cond_parts.split() ] ors.append( condition_specification(child_name, element_list, configuration_space)) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [ element for element in cond_parts.split() ] ands.append( condition_specification(child_name, element_list, configuration_space)) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification( child_name, element_list, configuration_space) configuration_space.add_condition(normal_condition) return configuration_space
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 1023, default_value=31) learning_rate = UniformFloatHyperparameter("learning_rate", 0.025, 0.3, default_value=0.1, log=True) min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 10, default_value=1) subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.5, 1, default_value=1) reg_alpha = UniformFloatHyperparameter('reg_alpha', 1e-10, 10, log=True, default_value=1e-10) reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10) cs.add_hyperparameters([ n_estimators, num_leaves, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, reg_lambda ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.randint('lgb_n_estimators', 901) + 100, 'num_leaves': hp.randint('lgb_num_leaves', 993) + 31, 'learning_rate': hp.loguniform('lgb_learning_rate', np.log(0.025), np.log(0.3)), 'min_child_weight': hp.randint('lgb_min_child_weight', 10) + 1, 'subsample': hp.uniform('lgb_subsample', 0.5, 1), 'colsample_bytree': hp.uniform('lgb_colsample_bytree', 0.5, 1), 'reg_alpha': hp.loguniform('lgb_reg_alpha', np.log(1e-10), np.log(10)), 'reg_lambda': hp.loguniform('lgb_reg_lambda', np.log(1e-10), np.log(10)) } init_trial = { 'n_estimators': 500, 'num_leaves': 31, 'learning_rate': 0.1, 'min_child_weight': 1, 'subsample': 1, 'colsample_bytree': 1, 'reg_alpha': 1e-10, 'reg_lambda': 1e-10 } return space
def _get_base_search_space( self, cs: ConfigurationSpace, dataset_properties: Dict[str, BaseDatasetPropertiesType], include: Optional[Dict[str, Any]], exclude: Optional[Dict[str, Any]], pipeline: List[Tuple[str, PipelineStepType]] ) -> ConfigurationSpace: if include is None: include = self.include keys = [pair[0] for pair in pipeline] for key in include: if key not in keys: raise ValueError('Invalid key in include: %s; should be one ' 'of %s' % (key, keys)) if exclude is None: exclude = self.exclude keys = [pair[0] for pair in pipeline] for key in exclude: if key not in keys: raise ValueError('Invalid key in exclude: %s; should be one ' 'of %s' % (key, keys)) if self.search_space_updates is not None: self._check_search_space_updates(include=include, exclude=exclude) self.search_space_updates.apply(pipeline=pipeline) matches = get_match_array( pipeline, dataset_properties, include=include, exclude=exclude) # Now we have only legal combinations at this step of the pipeline # Simple sanity checks assert np.sum(matches) != 0, "No valid pipeline found." assert np.sum(matches) <= np.size(matches), \ "'matches' is not binary; %s <= %d, %s" % \ (str(np.sum(matches)), np.size(matches), str(matches.shape)) # Iterate each dimension of the matches array (each step of the # pipeline) to see if we can add a hyperparameter for that step for node_idx, n_ in enumerate(pipeline): node_name, node = n_ # if the node isn't a choice we can add it immediately because it # must be active (if it wasn't, np.sum(matches) would be zero if isinstance(node, autoPyTorchChoice): choices_list = find_active_choices( matches, node, node_idx, dataset_properties, include.get(node_name), exclude.get(node_name) ) # ignore type check here as mypy is not able to infer # that isinstance(node, autoPyTorchChooice) = True sub_config_space = node.get_hyperparameter_search_space( # type: ignore[call-arg] dataset_properties, include=choices_list) cs.add_configuration_space(node_name, sub_config_space) # If the node is a choice, we have to figure out which of its # choices are actually legal choices else: cs.add_configuration_space( node_name, node.get_hyperparameter_search_space(dataset_properties, # type: ignore[call-arg] **node._get_search_space_updates() ) ) # And now add forbidden parameter configurations # According to matches if np.sum(matches) < np.size(matches): cs = add_forbidden( conf_space=cs, pipeline=pipeline, matches=matches, dataset_properties=dataset_properties, include=include, exclude=exclude) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=50, upper=500, default_value=50, log=False) sampling_strategy = CategoricalHyperparameter( name="sampling_strategy", choices=["majority", "not minority", "not majority", "all"], default_value="not minority") replacement = CategoricalHyperparameter("replacement", ["True", "False"], default_value="False") ab_n_estimators = UniformIntegerHyperparameter( name="ab_n_estimators", lower=50, upper=500, default_value=50, log=False) ab_learning_rate = UniformFloatHyperparameter( name="ab_learning_rate", lower=0.01, upper=2, default_value=0.1, log=True) ab_algorithm = CategoricalHyperparameter( name="ab_algorithm", choices=["SAMME.R", "SAMME"], default_value="SAMME.R") ab_max_depth = UniformIntegerHyperparameter(name="ab_max_depth", lower=1, upper=10, default_value=1, log=False) cs.add_hyperparameters([ n_estimators, sampling_strategy, replacement, ab_n_estimators, ab_learning_rate, ab_algorithm, ab_max_depth ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.randint('easy_ensemble_n_estimators', 451) + 50, 'sampling_strategy': hp.choice('easy_ensemble_sampling_strategy', ["majority", "not minority", "not majority", "all"]), 'replacement': hp.choice('easy_ensemble_replacement', ["True", "False"]), 'ab_n_estimators': hp.randint('ab_n_estimators', 451) + 50, 'ab_learning_rate': hp.loguniform('ab_learning_rate', np.log(0.01), np.log(2)), 'ab_algorithm': hp.choice('ab_algorithm', ["SAMME.R", "SAMME"]), 'ab_max_depth': hp.randint('ab_max_depth', 10) + 1 } init_trial = { 'n_estimators': 10, 'sampling_strategy': "not minority", 'replacement': "False", 'ab_n_estimators': 50, 'ab_learning_rate': 0.1, 'ab_algorithm': "SAMME.R", 'ab_max_depth': 1 } return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = cs.add_hyperparameter(Constant("loss", "deviance")) learning_rate = cs.add_hyperparameter(UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default=0.1, log=True)) n_estimators = cs.add_hyperparameter(UniformIntegerHyperparameter( name="n_estimators", lower=50, upper=500, default=100)) max_depth = cs.add_hyperparameter(UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default=3)) min_samples_split = cs.add_hyperparameter(UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2, log=False)) min_samples_leaf = cs.add_hyperparameter(UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1, log=False)) min_weight_fraction_leaf = cs.add_hyperparameter( UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)) subsample = cs.add_hyperparameter(UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default=1.0, log=False)) max_features = cs.add_hyperparameter(UniformFloatHyperparameter( "max_features", 0.5, 5, default=1)) max_leaf_nodes = cs.add_hyperparameter(UnParametrizedHyperparameter( name="max_leaf_nodes", value="None")) return cs
def get_hyperparameter_search_space(dataset_properties=None): reg_param = UniformFloatHyperparameter('reg_param', 0.0, 1.0, default_value=0.0) cs = ConfigurationSpace() cs.add_hyperparameter(reg_param) return cs
def create_hyperspace(regressor_id): if regressor_id == 'knn': from autosklearn.pipeline.components.regression.k_nearest_neighbors import KNearestNeighborsRegressor cs = KNearestNeighborsRegressor.get_hyperparameter_search_space() elif regressor_id == 'liblinear_svr': from autosklearn.pipeline.components.regression.liblinear_svr import LibLinear_SVR cs = LibLinear_SVR.get_hyperparameter_search_space() elif regressor_id == 'random_forest': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 500, default_value=200) criterion = CategoricalHyperparameter("criterion", ['mse', 'friedman_mse', 'mae']) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0, default_value=1.0) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = \ UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease, bootstrap]) elif regressor_id == 'lightgbm': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 1023, default_value=31) learning_rate = UniformFloatHyperparameter("learning_rate", 0.025, 0.3, default_value=0.1, log=True) min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 10, default_value=1) subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.5, 1, default_value=1) reg_alpha = UniformFloatHyperparameter('reg_alpha', 1e-10, 10, log=True, default_value=1e-10) reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10) cs.add_hyperparameters([n_estimators, num_leaves, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, reg_lambda]) elif 'catboost' in regressor_id: cs = ConfigurationSpace() max_depth = UniformIntegerHyperparameter("max_depth", 4, 12, default_value=6) learning_rate = UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1, log=True) subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1) reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10) loss_function = CategoricalHyperparameter("loss_function", ['RMSE', 'MAE'], default_value='RMSE') if 'cpu' in regressor_id: n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500) colsample_bylevel = UniformFloatHyperparameter("colsample_bylevel", 0.5, 1, default_value=1) cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample, colsample_bylevel, reg_lambda, loss_function]) elif 'gpu' in regressor_id: n_estimators = UniformIntegerHyperparameter("n_estimators", 1000, 10000, default_value=1000) min_child_samples = UniformIntegerHyperparameter("min_child_samples", 1, 15, default_value=1) cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample, min_child_samples, reg_lambda, loss_function]) # ---ADD THE HYPERSPACE FOR YOUR REGRESSOR--------------- else: raise ValueError('Undefined regressor identifier: %s!' % regressor_id) model = UnParametrizedHyperparameter("estimator", regressor_id) cs.add_hyperparameter(model) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() return cs
def get_hyperparameter_search_space(dataset_properties=None): n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=10, upper=100, default=10) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=2, upper=10, default=5) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1) min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") cs = ConfigurationSpace() cs.add_hyperparameter(n_estimators) cs.add_hyperparameter(max_depth) cs.add_hyperparameter(min_samples_split) cs.add_hyperparameter(min_samples_leaf) cs.add_hyperparameter(min_weight_fraction_leaf) cs.add_hyperparameter(max_leaf_nodes) return cs
def get_hyperparameter_search_space(self, dataset_properties=None): self.dataset_properties = dataset_properties cs = ConfigurationSpace() cs = DataPreprocessor._get_hyperparameter_search_space_recursevely( dataset_properties, cs, self._transformers) return cs
def get_pred_surface(self, rh, X_scaled, conf_list: list, contour_step_size): """fit epm on the scaled input dimension and return data to plot a contour plot of the empirical performance Parameters ---------- rh: RunHistory runhistory X_scaled: np.array configurations in scaled 2dim conf_list: list list of Configuration objects contour_step_size: float step-size for contour Returns ------- contour_data: (np.array, np.array, np.array) x, y, Z for contour plots """ # use PCA to reduce features to also at most 2 dims scen = copy.deepcopy(self.scenario) # pca changes feats if scen.feature_array.shape[1] > 2: self.logger.debug( "Use PCA to reduce features to from %d dim to 2 dim", scen.feature_array.shape[1]) # perform PCA insts = scen.feature_dict.keys() feature_array = np.array([scen.feature_dict[i] for i in insts]) feature_array = StandardScaler().fit_transform(feature_array) feature_array = PCA(n_components=2).fit_transform(feature_array) # inject in scenario-object scen.feature_array = feature_array scen.feature_dict = dict([(inst, feature_array[idx, :]) for idx, inst in enumerate(insts)]) scen.n_features = 2 # convert the data to train EPM on 2-dim featurespace (for contour-data) self.logger.debug("Convert data for epm.") X, y, types = convert_data_for_epm(scenario=scen, runhistory=rh, impute_inactive_parameters=True, logger=self.logger) types = np.array(np.zeros((2 + scen.feature_array.shape[1])), dtype=np.uint) num_params = len(scen.cs.get_hyperparameters()) # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions conf_dict = {} # Remove forbidden clauses (this is necessary to enable the impute_inactive_values-method, see #226) cs_no_forbidden = copy.deepcopy(conf_list[0].configuration_space) cs_no_forbidden.forbidden_clauses = [] for idx, c in enumerate(conf_list): c.configuration_space = cs_no_forbidden conf_list[idx] = impute_inactive_values(c) conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :] # Debug compare elements: c1, c2 = {str(z) for z in X}, {str(z) for z in conf_dict.keys()} self.logger.debug( "{} elements not in both sets, {} elements in both sets, X (len {}) and conf_dict (len {}) " "(might be a problem related to forbidden clauses?)".format( len(c1 ^ c2), len(c1 & c2), len(c1 ^ c2), len(c1), len(c2))) # self.logger.debug("Elements: {}".format(str(c1 ^ c2))) X_trans = [ ] # X_trans is the same as X but with reduced 2-dim features (so shape is (N, 2) instead of (N, M)) for x in X: x_scaled_conf = conf_dict[str(x[:num_params])] # append scaled config + pca'ed features (total of 4 values) per config/feature-sample X_trans.append( np.concatenate((x_scaled_conf, x[num_params:]), axis=0)) X_trans = np.array(X_trans) self.logger.debug( "Train random forest for contour-plot. Shape of X: {}, shape of X_trans: {}" .format(X.shape, X_trans.shape)) self.logger.debug("Faking configspace to be able to train rf...") # We need to fake config-space bypass imputation of inactive values in random forest implementation fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint") # We need to add fake hyperparameters. Always assume there are only two dimensions fake_cs.add_hyperparameters([ UniformFloatHyperparameter('fake-%d' % i, lower=0., upper=100000., default_value=0., log=False) for i in range(2) ]) bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object) model = RandomForestWithInstances(fake_cs, types, bounds, seed=self.rng.randint(MAXINT), instance_features=np.array( scen.feature_array), ratio_features=1.0) start = time.time() model.train(X_trans, y) self.logger.debug("Fitting random forest took %f time", time.time() - start) x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1 y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size), np.arange(y_min, y_max, contour_step_size)) self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min, x_max, y_min, y_max) self.logger.debug( "Predict on %d samples in grid to get surface (step-size: %f)", np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size) start = time.time() Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) self.logger.debug("Predicting random forest took %f time", time.time() - start) return xx, yy, Z
def read(pcs_string, debug=False): """ Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- .. testsetup:: pcs_test from ConfigSpace import ConfigurationSpace import ConfigSpace.hyperparameters as CSH from ConfigSpace.read_and_write import pcs cs = ConfigurationSpace() cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3])) with open('configspace.pcs', 'w') as f: f.write(pcs.write(cs)) .. doctest:: pcs_test >>> from ConfigSpace.read_and_write import pcs >>> with open('configspace.pcs', 'r') as fh: ... deserialized_conf = pcs.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The deserialized ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = {"int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter} try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default_value = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default_value = param_list[-2] param = create["categorical"](name=name, choices=choices, default_value=default_value) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append(ForbiddenEqualsClause( configuration_space.get_hyperparameter(tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause(ForbiddenAndConjunction( *clause_list)) # Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def get_hyperparameter_search_space( self, dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, default: Optional[str] = None, include: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ) -> ConfigurationSpace: """Returns the configuration space of the current chosen components Args: dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on default (Optional[str]): Default scheduler to use include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive list, and will exclusively use this components. exclude: Optional[Dict[str, Any]]: which components to skip Returns: ConfigurationSpace: the configuration space of the hyper-parameters of the chosen component """ cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} dataset_properties = {**self.dataset_properties, **dataset_properties} # Compile a list of legal trainers for this problem available_trainers = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_trainers) == 0: raise ValueError("No trainer found") if default is None: defaults = [ 'StandardTrainer', ] for default_ in defaults: if default_ in available_trainers: default = default_ break updates = self._get_search_space_updates() if '__choice__' in updates.keys(): choice_hyperparameter = updates['__choice__'] if not set(choice_hyperparameter.value_range).issubset( available_trainers): raise ValueError("Expected given update for {} to have " "choices in {} got {}".format( self.__class__.__name__, available_trainers, choice_hyperparameter.value_range)) trainer = CategoricalHyperparameter( '__choice__', choice_hyperparameter.value_range, default_value=choice_hyperparameter.default_value) else: trainer = CategoricalHyperparameter('__choice__', list( available_trainers.keys()), default_value=default) cs.add_hyperparameter(trainer) for name in trainer.choices: updates = self._get_search_space_updates(prefix=name) config_space = available_trainers[ name].get_hyperparameter_search_space( dataset_properties, # type:ignore **updates) parent_hyperparameter = {'parent': trainer, 'value': name} cs.add_configuration_space( name, config_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def read(pcs_string, debug=False): """ Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- .. testsetup:: pcs_new_test from ConfigSpace import ConfigurationSpace import ConfigSpace.hyperparameters as CSH from ConfigSpace.read_and_write import pcs_new cs = ConfigurationSpace() cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3])) with open('configspace.pcs_new', 'w') as f: f.write(pcs_new.write(cs)) .. doctest:: pcs_new_test >>> from ConfigSpace.read_and_write import pcs_new >>> with open('configspace.pcs_new', 'r') as fh: ... deserialized_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The deserialized ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default_value = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default_value = param_list[-2] param = create["categorical"]( name=name, choices=choices, default_value=default_value, ) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default_value = param_list[-2] param = create["ordinal"]( name=name, sequence=sequence, default_value=default_value, ) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': hp = configuration_space.get_hyperparameter(tmp_list[0]) if isinstance(hp, NumericalHyperparameter): if isinstance(hp, IntegerHyperparameter): forbidden_value = int(tmp_list[2]) elif isinstance(hp, FloatHyperparameter): forbidden_value = float(tmp_list[2]) else: raise NotImplementedError if forbidden_value < hp.lower or forbidden_value > hp.upper: raise ValueError( f'forbidden_value is set out of the bound, it needs to' f' be set between [{hp.lower}, {hp.upper}]' f' but its value is {forbidden_value}') elif isinstance( hp, (CategoricalHyperparameter, OrdinalHyperparameter)): hp_values = hp.choices if isinstance(hp, CategoricalHyperparameter)\ else hp.sequence forbidden_value_in_hp_values = tmp_list[2] in hp_values if forbidden_value_in_hp_values: forbidden_value = tmp_list[2] else: raise ValueError( f'forbidden_value is set out of the allowed value ' f'sets, it needs to be one member from {hp_values} ' f'but its value is {forbidden_value}') else: raise ValueError('Unsupported Hyperparamter sorts') clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), forbidden_value)) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append( condition_specification( child_name, element_list, configuration_space, )) else: # now taking care of ands ands = [] for and_part in condition: element_list = [ element for part in condition for element in and_part.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [ element for element in cond_parts.split() ] ors.append( condition_specification( child_name, element_list, configuration_space, )) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [ element for element in cond_parts.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification( child_name, element_list, configuration_space, ) configuration_space.add_condition(normal_condition) return configuration_space
def add_params(cs: ConfigurationSpace): ''' adds parameters to ConfigurationSpace ''' try: classifier = cs.get_hyperparameter("classifier") if "RandomForest" not in classifier.choices: return n_estimators = UniformIntegerHyperparameter(name="rf:n_estimators", lower=10, upper=100, default_value=10, log=True) cs.add_hyperparameter(n_estimators) criterion = CategoricalHyperparameter(name="rf:criterion", choices=["gini", "entropy"], default_value="gini") cs.add_hyperparameter(criterion) max_features = CategoricalHyperparameter( name="rf:max_features", choices=["sqrt", "log2", "None"], default_value="sqrt") cs.add_hyperparameter(max_features) max_depth = UniformIntegerHyperparameter(name="rf:max_depth", lower=10, upper=2**31, default_value=2**31, log=True) cs.add_hyperparameter(max_depth) min_samples_split = UniformIntegerHyperparameter( name="rf:min_samples_split", lower=2, upper=100, default_value=2, log=True) cs.add_hyperparameter(min_samples_split) min_samples_leaf = UniformIntegerHyperparameter( name="rf:min_samples_leaf", lower=2, upper=100, default_value=10, log=True) cs.add_hyperparameter(min_samples_leaf) bootstrap = CategoricalHyperparameter(name="rf:bootstrap", choices=[True, False], default_value=True) cs.add_hyperparameter(bootstrap) cond = InCondition(child=n_estimators, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=criterion, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=max_features, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=max_depth, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=min_samples_split, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=min_samples_leaf, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) cond = InCondition(child=bootstrap, parent=classifier, values=["RandomForest"]) cs.add_condition(cond) print(cs) except: return
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter( "max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.choice('rf_n_estimators', [100]), 'criterion': hp.choice('rf_criterion', ["gini", "entropy"]), 'max_features': hp.uniform('rf_max_features', 0, 1), 'max_depth': hp.choice('rf_max_depth', [None]), 'min_samples_split': hp.randint('rf_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('rf_min_samples_leaf', 20) + 1, 'min_weight_fraction_leaf': hp.choice('rf_min_weight_fraction_leaf', [0]), 'max_leaf_nodes': hp.choice('rf_max_leaf_nodes', [None]), 'min_impurity_decrease': hp.choice('rf_min_impurity_decrease', [0]), 'bootstrap': hp.choice('rf_bootstrap', ["True", "False"]) } init_trial = { 'n_estimators': 100, 'criterion': "gini", 'max_features': 0.5, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0, 'max_leaf_nodes': None, 'min_impurity_decrease': 0, 'bootstrap': "False" } return space
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ UniformIntegerHyperparameter, CategoricalHyperparameter, \ UnParametrizedHyperparameter, Constant from ConfigSpace.conditions import EqualsCondition, InCondition from automl.utl import json_utils cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"], default_value="log") penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet"], default_value="l2") alpha = UniformFloatHyperparameter("alpha", 1e-7, 1e-1, log=True, default_value=0.0001) l1_ratio = UniformFloatHyperparameter("l1_ratio", 1e-9, 1, log=True, default_value=0.15) fit_intercept = Constant("fit_intercept", "True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True, default_value=1e-4) epsilon = UniformFloatHyperparameter("epsilon",
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() minimum_fraction = UniformFloatHyperparameter( "minimum_fraction", lower=.0001, upper=0.5, default_value=0.01, log=True) cs.add_hyperparameter(minimum_fraction) return cs
def _convert_dict_to_config(config_list: List[str], cs: ConfigurationSpace) -> Configuration: """Since we save a configurations in a dictionary str->str we have to try to figure out the type (int, float, str) of each parameter value Parameters ---------- config_list: List[str] Configuration as a list of "str='str'" cs: ConfigurationSpace Configuration Space to translate dict object into Confiuration object """ config_dict = {} v = '' # type: Union[str, float, int, bool] for param in config_list: k, v = param.split("=") v = v.strip("'") hp = cs.get_hyperparameter(k) if isinstance(hp, FloatHyperparameter): v = float(v) elif isinstance(hp, IntegerHyperparameter): v = int(v) elif isinstance(hp, (CategoricalHyperparameter, Constant)): # Checking for the correct type requires jumping some hoops # First, we gather possible interpretations of our string interpretations = [ v ] # type: List[Union[str, bool, int, float]] if v in ["True", "False"]: # Special Case for booleans (assuming we support them) # This is important to avoid false positive warnings triggered by 1 == True or "False" == True interpretations.append(True if v == 'True' else False) else: for t in [int, float]: try: interpretations.append(t(v)) except ValueError: continue # Second, check if it's in the choices / the correct type. legal = { interpretation for interpretation in interpretations if hp.is_legal(interpretation) } # Third, issue warnings if the interpretation is ambigious if len(legal) != 1: logging.getLogger("smac.trajlogger").warning( "Ambigous or no interpretation of value {} for hp {} found ({} possible interpretations). " "Passing string, but this will likely result in an error" .format(v, hp.name, len(legal))) else: v = legal.pop() config_dict[k] = v config = Configuration(configuration_space=cs, values=config_dict) config.origin = "External Trajectory" return config
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ UniformIntegerHyperparameter, CategoricalHyperparameter, \ UnParametrizedHyperparameter, Constant from automl.utl import json_utils cs = ConfigurationSpace() # the smoothing parameter is a non-negative float # I will limit it to 1000 and put it on a logarithmic scale. (SF) # Please adjust that, if you know a proper range, this is just a guess. alpha = UniformFloatHyperparameter(name="alpha", lower=1e-2, upper=100, default_value=1, log=True) fit_prior = CategoricalHyperparameter(name="fit_prior", choices=["True", "False"], default_value="True") cs.add_hyperparameters([alpha, fit_prior]) json_utils.write_cs_to_json_file(cs, "BernoulliNB")
def setups_to_configspace(setups, default_params, keyfield='parameter_name', logscale_parameters=None, ignore_parameters=None, ignore_constants=True): # setups is result from openml.setups.list_setups call # note that this config space is not equal to the one # obtained from auto-sklearn; but useful for creating # the pcs file parameter_values = {} flow_id = None for setup_id in setups: current = setups[setup_id] if flow_id is None: flow_id = current.flow_id else: if current.flow_id != flow_id: raise ValueError( 'flow ids are expected to be equal. Expected %d, saw %s' % (flow_id, current.flow_id)) for param_id in current.parameters.keys(): name = getattr(current.parameters[param_id], keyfield) value = current.parameters[param_id].value if name not in parameter_values.keys(): parameter_values[name] = set() parameter_values[name].add(value) uncovered = set(parameter_values.keys()) - set(default_params.keys()) if len(uncovered) > 0: raise ValueError( 'Mismatch between keys default_params and parameter_values. Missing' % str(uncovered)) def is_castable_to(value, type): try: type(value) return True except ValueError: return False cs = ConfigurationSpace() if logscale_parameters is None: logscale_parameters = set() # for parameter in logscale_parameters: # if parameter not in parameter_values.keys(): # raise ValueError('(Logscale) Parameter not recognized: %s' %parameter) constants = set() for name in parameter_values.keys(): if ignore_parameters is not None and name in ignore_parameters: continue all_values = parameter_values[name] if len(all_values) <= 1: constants.add(name) if ignore_constants: continue if all(is_castable_to(item, int) for item in all_values): all_values = [int(item) for item in all_values] lower = min(all_values) upper = max(all_values) default = default_params[name] if not is_castable_to(default, int): sys.stderr.write( 'Illegal default for parameter %s (expected int): %s' % (name, str(default))) default = int(lower + lower + upper / 2) hyper = UniformIntegerHyperparameter(name=name, lower=lower, upper=upper, default=default, log=name in logscale_parameters) cs.add_hyperparameter(hyper) elif all(is_castable_to(item, float) for item in all_values): all_values = [float(item) for item in all_values] lower = min(all_values) upper = max(all_values) default = default_params[name] if not is_castable_to(default, float): sys.stderr.write( 'Illegal default for parameter %s (expected int): %s' % (name, str(default))) default = lower + lower + upper / 2 hyper = UniformFloatHyperparameter(name=name, lower=lower, upper=upper, default=default, log=name in logscale_parameters) cs.add_hyperparameter(hyper) else: values = [flow_to_sklearn(item) for item in all_values] hyper = CategoricalHyperparameter(name=name, choices=values, default=default_params[name]) cs.add_hyperparameter(hyper) return cs, constants
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, CategoricalHyperparameter from automl.utl import json_utils cs = ConfigurationSpace() n_neighbors = UniformIntegerHyperparameter(name="n_neighbors", lower=1, upper=100, log=True, default_value=5) weights = CategoricalHyperparameter(name="weights", choices=["uniform", "distance"], default_value="uniform") p = CategoricalHyperparameter(name="p", choices=[1, 2], default_value=2) cs.add_hyperparameters([n_neighbors, weights, p]) json_utils.write_cs_to_json_file(cs, "KNeighborsClassifier")
def get_branin_config_space() -> ConfigurationSpace: cs = ConfigurationSpace() cs.add_hyperparameter(UniformFloatHyperparameter('x', -5, 10)) cs.add_hyperparameter(UniformFloatHyperparameter('y', 0, 15)) return cs
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ UniformIntegerHyperparameter, CategoricalHyperparameter, \ UnParametrizedHyperparameter, Constant from automl.utl import json_utils cs = ConfigurationSpace() # base_estimator = Constant(name="base_estimator", value="None") n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=50, upper=500, default_value=50, log=False) learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=2, default_value=0.1, log=True) loss = CategoricalHyperparameter( name="loss", choices=["linear", "square", "exponential"], default_value="linear") max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=1, log=False) cs.add_hyperparameters([n_estimators, learning_rate, loss, max_depth]) json_utils.write_cs_to_json_file(cs, "AdaBoostRegressor")
def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, use_augmenter: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_augmenter", value_range=(True, False), default_value=True, ), scale_offset: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="scale_offset", value_range=(0, 0.4), default_value=0.2, ), translate_percent_offset: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="translate_percent_offset", value_range=(0, 0.4), default_value=0.2), shear: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shear", value_range=(0, 45), default_value=30, ), rotate: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="rotate", value_range=(0, 360), default_value=45, ), ) -> ConfigurationSpace: cs = ConfigurationSpace() use_augmenter = get_hyperparameter(use_augmenter, CategoricalHyperparameter) scale_offset = get_hyperparameter(scale_offset, UniformFloatHyperparameter) translate_percent_offset = get_hyperparameter(translate_percent_offset, UniformFloatHyperparameter) shear = get_hyperparameter(shear, UniformIntegerHyperparameter) rotate = get_hyperparameter(rotate, UniformIntegerHyperparameter) cs.add_hyperparameters([use_augmenter, scale_offset, translate_percent_offset]) cs.add_hyperparameters([shear, rotate]) # only add hyperparameters to configuration space if we are using the augmenter cs.add_condition(CS.EqualsCondition(scale_offset, use_augmenter, True)) cs.add_condition(CS.EqualsCondition(translate_percent_offset, use_augmenter, True)) cs.add_condition(CS.EqualsCondition(shear, use_augmenter, True)) cs.add_condition(CS.EqualsCondition(rotate, use_augmenter, True)) return cs
def _get_hyperparameter_search_space(self, include=None, exclude=None, dataset_properties=None): """Create the hyperparameter configuration space. Parameters ---------- include : dict (optional, default=None) Returns ------- cs : ConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'classification' if dataset_properties['target_type'] != 'classification': dataset_properties['target_type'] = 'classification' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False cs = self._get_base_search_space( cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) classifiers = cs.get_hyperparameter('classifier:__choice__').choices preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices available_classifiers = self._final_estimator.get_available_components( dataset_properties) possible_default_classifier = copy.copy(list( available_classifiers.keys())) default = cs.get_hyperparameter('classifier:__choice__').default_value del possible_default_classifier[possible_default_classifier.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in classifiers: if SPARSE in available_classifiers[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'classifier:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default_value = default # which would take too long # Combinations of non-linear models with feature learning: classifiers_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree", "xgradient_boosting"] feature_learning = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for c, f in product(classifiers_, feature_learning): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default_value = default # Won't work # Multinomial NB etc don't use with features learning, pca etc classifiers_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in product(classifiers_, preproc_with_negative_X): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default_value = default self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() hidden_size = UniformIntegerHyperparameter("hidden_size", 100, 500, default_value=200) activation = CategoricalHyperparameter( "activation", ["identity", "logistic", "tanh", "relu"], default_value="relu") solver = CategoricalHyperparameter("solver", ["sgd", "adam"], default_value="adam") alpha = UniformFloatHyperparameter("alpha", 1e-7, 1., log=True, default_value=0.0001) learning_rate = CategoricalHyperparameter( "learning_rate", ["adaptive", "invscaling", "constant"], default_value="constant") learning_rate_init = UniformFloatHyperparameter( "learning_rate_init", 1e-4, 3e-1, default_value=0.001, log=True) tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True, default_value=1e-4) momentum = UniformFloatHyperparameter("momentum", 0.6, 1, q=0.05, default_value=0.9) nesterovs_momentum = CategoricalHyperparameter( "nesterovs_momentum", [True, False], default_value=True) beta1 = UniformFloatHyperparameter("beta1", 0.6, 1, default_value=0.9) power_t = UniformFloatHyperparameter("power_t", 1e-5, 1, log=True, default_value=0.5) cs.add_hyperparameters([ hidden_size, activation, solver, alpha, learning_rate, learning_rate_init, tol, momentum, nesterovs_momentum, beta1, power_t ]) learning_rate_condition = EqualsCondition(learning_rate, solver, "sgd") momentum_condition = EqualsCondition(momentum, solver, "sgd") nesterovs_momentum_condition = EqualsCondition( nesterovs_momentum, solver, "sgd") beta1_condition = EqualsCondition(beta1, solver, "adam") power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") cs.add_conditions([ learning_rate_condition, momentum_condition, nesterovs_momentum_condition, beta1_condition, power_t_condition ]) return cs elif optimizer == 'tpe': space = { 'hidden_size': hp.randint("mlp_hidden_size", 450) + 50, 'activation': hp.choice('mlp_activation', ["identity", "logistic", "tanh", "relu"]), 'solver': hp.choice('mlp_solver', [("sgd", { 'learning_rate': hp.choice('mlp_learning_rate', [ ("adaptive", {}), ("constant", {}), ("invscaling", { 'power_t': hp.uniform('mlp_power_t', 1e-5, 1) }) ]), 'momentum': hp.uniform('mlp_momentum', 0.6, 1), 'nesterovs_momentum': hp.choice('mlp_nesterovs_momentum', [True, False]) }), ("adam", { 'beta1': hp.uniform('mlp_beta1', 0.6, 1) })]), 'alpha': hp.loguniform('mlp_alpha', np.log(1e-7), np.log(1e-1)), 'learning_rate_init': hp.loguniform('mlp_learning_rate_init', np.log(1e-6), np.log(1e-1)), 'tol': hp.loguniform('mlp_tol', np.log(1e-5), np.log(1e-1)) } return space
def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, num_blocks: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_blocks", value_range=(1, 10), default_value=5), num_filters: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_filters", value_range=(4, 64), default_value=32), kernel_size: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="kernel_size", value_range=(4, 64), default_value=32), use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="use_dropout", value_range=(True, False), default_value=False), dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="dropout", value_range=(0, 0.5), default_value=0.1), ) -> ConfigurationSpace: cs = ConfigurationSpace() min_num_blocks, max_num_blocks = num_blocks.value_range num_blocks_hp = get_hyperparameter(num_blocks, UniformIntegerHyperparameter) cs.add_hyperparameter(num_blocks_hp) add_hyperparameter(cs, kernel_size, UniformIntegerHyperparameter) use_dropout_hp = get_hyperparameter(use_dropout, CategoricalHyperparameter) cs.add_hyperparameter(use_dropout_hp) dropout_hp = get_hyperparameter(dropout, UniformFloatHyperparameter) cs.add_hyperparameter(dropout_hp) cs.add_condition(CS.EqualsCondition(dropout_hp, use_dropout_hp, True)) for i in range(0, int(max_num_blocks)): num_filter_search_space = HyperparameterSearchSpace( f"num_filters_{i}", value_range=num_filters.value_range, default_value=num_filters.default_value, log=num_filters.log) num_filters_hp = get_hyperparameter(num_filter_search_space, UniformIntegerHyperparameter) cs.add_hyperparameter(num_filters_hp) if i >= int(min_num_blocks): cs.add_condition( CS.GreaterThanCondition(num_filters_hp, num_blocks_hp, i)) return cs
def get_hyperparameter_search_space(dataset_properties=None): if dataset_properties is not None and \ (dataset_properties.get("is_sparse") is True or dataset_properties.get("signed") is False): allow_chi2 = False else: allow_chi2 = True possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine'] if allow_chi2: possible_kernels.append("chi2") kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf') degree = UniformIntegerHyperparameter('degree', 2, 5, 3) gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default=0.1) coef0 = UniformFloatHyperparameter("coef0", -1, 1, default=0) n_components = UniformIntegerHyperparameter("n_components", 50, 10000, default=100, log=True) cs = ConfigurationSpace() cs.add_hyperparameter(kernel) cs.add_hyperparameter(degree) cs.add_hyperparameter(gamma) cs.add_hyperparameter(coef0) cs.add_hyperparameter(n_components) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) gamma_kernels = ["poly", "rbf", "sigmoid"] if allow_chi2: gamma_kernels.append("chi2") gamma_condition = InCondition(gamma, kernel, gamma_kernels) cs.add_condition(degree_depends_on_poly) cs.add_condition(coef0_condition) cs.add_condition(gamma_condition) return cs
def _get_hyperparameter_search_space(self, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : ConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if 'target_type' not in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probably dense dataset_properties['sparse'] = False cs = self._get_base_search_space( cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices available_regressors = self._final_estimator.get_available_components( dataset_properties) possible_default_regressor = copy.copy(list( available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default_value del possible_default_regressor[ possible_default_regressor.index(default)] # A regressor which can handle sparse data after the densifier is # forbidden for memory issues for key in regressors: if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: forb_reg = ForbiddenEqualsClause( cs.get_hyperparameter('regressor:__choice__'), key) forb_fpp = ForbiddenEqualsClause(cs.get_hyperparameter( 'feature_preprocessor:__choice__'), 'densifier') cs.add_forbidden_clause( ForbiddenAndConjunction(forb_reg, forb_fpp)) # Success break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default_value = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest", "xgradient_boosting"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "regressor:__choice__"), r), ForbiddenEqualsClause(cs.get_hyperparameter( "feature_preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default_value = default self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs