def test_read_new_configuration_space_forbidden(self): cs_with_forbidden = ConfigurationSpace() int_hp = UniformIntegerHyperparameter('int_hp', 0, 50, 30) float_hp = UniformFloatHyperparameter('float_hp', 0., 50., 30.) cat_hp_str = CategoricalHyperparameter('cat_hp_str', ['a', 'b', 'c'], 'b') ord_hp_str = OrdinalHyperparameter('ord_hp_str', ['a', 'b', 'c'], 'b') cs_with_forbidden.add_hyperparameters([int_hp, float_hp, cat_hp_str, ord_hp_str]) int_hp_forbidden = ForbiddenAndConjunction(ForbiddenEqualsClause(int_hp, 1)) float_hp_forbidden_1 = ForbiddenEqualsClause(float_hp, 1.0) float_hp_forbidden_2 = ForbiddenEqualsClause(float_hp, 2.0) float_hp_forbidden = ForbiddenAndConjunction(float_hp_forbidden_1, float_hp_forbidden_2) cat_hp_str_forbidden = ForbiddenAndConjunction(ForbiddenEqualsClause(cat_hp_str, 'a')) ord_hp_float_forbidden = ForbiddenAndConjunction(ForbiddenEqualsClause(ord_hp_str, 'a')) cs_with_forbidden.add_forbidden_clauses([int_hp_forbidden, float_hp_forbidden, cat_hp_str_forbidden, ord_hp_float_forbidden]) complex_cs = list() complex_cs.append("int_hp integer [0,50] [30]") complex_cs.append("float_hp real [0.0, 50.0] [30.0]") complex_cs.append("cat_hp_str categorical {a, b, c} [b]") complex_cs.append("ord_hp_str ordinal {a, b, c} [b]") complex_cs.append("# Forbiddens:") complex_cs.append("{int_hp=1}") complex_cs.append("{float_hp=1.0, float_hp=2.0}") complex_cs.append("{cat_hp_str=a}") complex_cs.append("{ord_hp_str=a}") cs_new = pcs_new.read(complex_cs) self.assertEqual(cs_new, cs_with_forbidden)
def test_write_forbidden(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) hp2 = UniformIntegerHyperparameter("child", 0, 2) hp3 = UniformIntegerHyperparameter("child2", 0, 2) hp4 = UniformIntegerHyperparameter("child3", 0, 2) hp5 = CategoricalHyperparameter("child4", [4, 5, 6, 7]) cs.add_hyperparameters([hp1, hp2, hp3, hp4, hp5]) forb2 = ForbiddenEqualsClause(hp1, 1) forb3 = ForbiddenInClause(hp2, range(2, 3)) forb4 = ForbiddenInClause(hp3, range(2, 3)) forb5 = ForbiddenInClause(hp4, range(2, 3)) forb6 = ForbiddenInClause(hp5, [6, 7]) and1 = ForbiddenAndConjunction(forb2, forb3) and2 = ForbiddenAndConjunction(forb2, forb4) and3 = ForbiddenAndConjunction(forb2, forb5) cs.add_forbidden_clauses( [forb2, forb3, forb4, forb5, forb6, and1, and2, and3]) irace.write(cs) # generates file called forbidden.txt
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = cs.add_hyperparameter(UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default=1.0)) loss = cs.add_hyperparameter(CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default="squared_epsilon_insensitive")) # Random Guess epsilon = cs.add_hyperparameter(UniformFloatHyperparameter( name="epsilon", lower=0.001, upper=1, default=0.1, log=True)) dual = cs.add_hyperparameter(Constant("dual", "False")) # These are set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter(Constant( "intercept_scaling", 1)) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def get_cs(): cs = ConfigurationSpace() epsilon = CategoricalHyperparameter("epsilon", [1e-4, 1e-3, 1e-2, 1e-1, 1], default_value=1e-4) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters( [epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive")) cs.add_forbidden_clause(dual_and_loss) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = cs.add_hyperparameter(Constant("penalty", "l1")) loss = cs.add_hyperparameter( CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default="squared_hinge")) dual = cs.add_hyperparameter(Constant("dual", "False")) # This is set ad-hoc tol = cs.add_hyperparameter( UniformFloatHyperparameter("tol", 1e-5, 1e-1, default=1e-4, log=True)) C = cs.add_hyperparameter( UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default=1.0)) multi_class = cs.add_hyperparameter(Constant("multi_class", "ovr")) # These are set ad-hoc fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter( Constant("intercept_scaling", 1)) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) cs.add_forbidden_clause(penalty_and_loss) return cs
def test_and_conjunction(self): hp1 = CategoricalHyperparameter("parent", [0, 1]) hp2 = UniformIntegerHyperparameter("child", 0, 2) hp3 = UniformIntegerHyperparameter("child2", 0, 2) hp4 = UniformIntegerHyperparameter("child3", 0, 2) forb2 = ForbiddenEqualsClause(hp1, 1) forb3 = ForbiddenInClause(hp2, range(2, 3)) forb4 = ForbiddenInClause(hp3, range(2, 3)) forb5 = ForbiddenInClause(hp4, range(2, 3)) and1 = ForbiddenAndConjunction(forb2, forb3) and2 = ForbiddenAndConjunction(forb2, forb4) and3 = ForbiddenAndConjunction(forb2, forb5) total_and = ForbiddenAndConjunction(and1, and2, and3) self.assertEqual("((Forbidden: parent == 1 && Forbidden: child in {2}) " "&& (Forbidden: parent == 1 && Forbidden: child2 in {2}) " "&& (Forbidden: parent == 1 && Forbidden: child3 in " "{2}))", str(total_and)) results = [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True] for i, values in enumerate(product(range(2), range(3), range(3), range(3))): is_forbidden = total_and.is_forbidden( {"parent": values[0], "child": values[1], "child2": values[2], "child3": values[3]}, True, ) self.assertEqual(results[i], is_forbidden) self.assertFalse(total_and.is_forbidden({}, strict=False))
def _build_forbidden_and_conjunction(clause: ForbiddenAndConjunction) -> Dict: return { 'name': clause.get_descendant_literal_clauses()[0].hyperparameter.name, 'type': 'AND', 'clauses': [ _build_forbidden(component) for component in clause.components ], }
def set_probabilities_in_cs(self, cs: ConfigurationSpace, relied2models: Dict[str, List[str]], relied2AllModels: Dict[str, List[str]], all_models: List[str], **kwargs): estimator = cs.get_hyperparameter("estimator:__choice__") probabilities = [] model2prob = {} L = 0 for rely_model in relied2models: cur_models = relied2models[rely_model] L += len(cur_models) for model in cur_models: model2prob[model] = kwargs[rely_model] / len(cur_models) p_rest = (1 - sum(model2prob.values())) / (len(all_models) - L) for model in estimator.choices: probabilities.append(model2prob.get(model, p_rest)) estimator.probabilities = probabilities default_estimator_choice = None for models in relied2models.values(): if models: default_estimator_choice = models[0] estimator.default_value = default_estimator_choice for rely_model, path in RelyModels.info: forbid_eq_value = path[-1] path = path[:-1] forbid_eq_key = ":".join(path + ["__choice__"]) forbid_eq_key_hp = cs.get_hyperparameter(forbid_eq_key) forbid_in_key = "estimator:__choice__" hit = relied2AllModels.get(rely_model) if not hit: choices = list(forbid_eq_key_hp.choices) choices.remove(forbid_eq_value) forbid_eq_key_hp.choices = tuple(choices) forbid_eq_key_hp.default_value = choices[0] forbid_eq_key_hp.probabilities = [1 / len(choices) ] * len(choices) # fixme 最后我放弃了在这上面进行修改,在hdl部分就做了预处理 continue forbid_in_value = list(set(all_models) - set(hit)) # 只选择了boost模型 if not forbid_in_value: continue choices = forbid_eq_key_hp.choices probabilities = [] p: float = kwargs[rely_model] p_rest = (1 - p) * (len(choices) - 1) for choice in choices: if choice == forbid_eq_value: probabilities.append(p) else: probabilities.append(p_rest) forbid_eq_key_hp.probabilities = probabilities cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause(forbid_eq_key_hp, forbid_eq_value), ForbiddenInClause(cs.get_hyperparameter(forbid_in_key), forbid_in_value), ))
def _build_forbidden_and_conjunction(clause: ForbiddenAndConjunction) -> Dict: return { 'name': clause.get_descendant_literal_clauses()[0].hyperparameter.name, 'type': 'AND', 'clauses': [_build_forbidden(component) for component in clause.components], }
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = Constant("dual", "False") # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge")) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l1")) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs
def test_build_new_forbidden(self): expected = "a categorical {a, b, c} [a]\nb categorical {a, b, c} [c]\n\n" \ "{a=a, b=a}\n{a=a, b=b}\n{a=b, b=a}\n{a=b, b=b}\n" cs = ConfigurationSpace() a = CategoricalHyperparameter("a", ["a", "b", "c"], "a") b = CategoricalHyperparameter("b", ["a", "b", "c"], "c") cs.add_hyperparameter(a) cs.add_hyperparameter(b) fb = ForbiddenAndConjunction(ForbiddenInClause(a, ["a", "b"]), ForbiddenInClause(b, ["a", "b"])) cs.add_forbidden_clause(fb) value = pcs_new.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(**kwargs): n_clusters_factor = UniformFloatHyperparameter("n_clusters_factor", 0., 1., default_value=1.) affinity = CategoricalHyperparameter("affinity", ["euclidean", "manhattan", "cosine"], default_value="euclidean") linkage = CategoricalHyperparameter("linkage", ["ward", "complete", "average"], default_value="ward") pooling_func = CategoricalHyperparameter("pooling_func", ["mean", "median", "max"], default_value="mean") cs = ConfigurationSpace() cs.add_hyperparameters([n_clusters_factor, affinity, linkage, pooling_func]) affinity_and_linkage = ForbiddenAndConjunction(ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() penalty = Constant("penalty", "l1") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = Constant("dual", "False") # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) cs.add_forbidden_clause(penalty_and_loss) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'tol': hp.loguniform('lbs_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('lbs_C', np.log(0.03125), np.log(32768)), 'loss': 'squared_hinge', 'multi_class': 'ovr', 'dual': 'False', 'fit_intercept': 'True', 'intercept_scaling': 1, 'penalty': 'L1' } return space
def __forbidden(self, value: List, store: Dict, cs: ConfigurationSpace): assert isinstance(value, list) for item in value: assert isinstance(item, dict) clauses = [] for k, v in item.items(): if isinstance(v, list) and len(v) == 1: v = v[0] if isinstance(v, list): clauses.append( ForbiddenInClause(store[k], list(map(smac_hdl._encode, v)))) else: clauses.append( ForbiddenEqualsClause(store[k], smac_hdl._encode(v))) cs.add_forbidden_clause(ForbiddenAndConjunction(*clauses))
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, default_value=25) affinity = CategoricalHyperparameter( "affinity", ["euclidean", "manhattan", "cosine"], default_value="euclidean") linkage = CategoricalHyperparameter( "linkage", ["ward", "complete", "average"], default_value="ward") pooling_func = CategoricalHyperparameter( "pooling_func", ["mean", "median", "max"], default_value="mean") cs.add_hyperparameters([n_clusters, affinity, linkage, pooling_func]) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def test_and_conjunction(self): hp1 = CategoricalHyperparameter("parent", [0, 1]) hp2 = UniformIntegerHyperparameter("child", 0, 2) hp3 = UniformIntegerHyperparameter("child2", 0, 2) hp4 = UniformIntegerHyperparameter("child3", 0, 2) forb2 = ForbiddenEqualsClause(hp1, 1) forb3 = ForbiddenInClause(hp2, range(2, 3)) forb4 = ForbiddenInClause(hp3, range(2, 3)) forb5 = ForbiddenInClause(hp4, range(2, 3)) and1 = ForbiddenAndConjunction(forb2, forb3) and2 = ForbiddenAndConjunction(forb2, forb4) and3 = ForbiddenAndConjunction(forb2, forb5) total_and = ForbiddenAndConjunction(and1, and2, and3) self.assertEqual( "((Forbidden: parent == 1 && Forbidden: child in {2}) " "&& (Forbidden: parent == 1 && Forbidden: child2 in {2}) " "&& (Forbidden: parent == 1 && Forbidden: child3 in " "{2}))", str(total_and)) results = [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True ] for i, values in enumerate( product(range(2), range(3), range(3), range(3))): is_forbidden = total_and.is_forbidden( { "parent": values[0], "child": values[1], "child2": values[2], "child3": values[3] }, True, ) self.assertEqual(results[i], is_forbidden) self.assertFalse(total_and.is_forbidden({}, strict=False))
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() epsilon = CategoricalHyperparameter("epsilon", [1e-4, 1e-3, 1e-2, 1e-1, 1], default_value=1e-4) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'loss': hp.choice('liblinear_combination', [{'loss': "epsilon_insensitive", 'dual': "True"}, {'loss': "squared_epsilon_insensitive", 'dual': "True"}, {'loss': "squared_epsilon_insensitive", 'dual': "False"}]), 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1])} init_trial = {'loss': {'loss': "epsilon_insensitive", 'dual': "True"}, 'tol': 1e-4, 'C': 1, 'fit_intercept': "True", 'intercept_scaling': 1} return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_clusters = cs.add_hyperparameter( UniformIntegerHyperparameter("n_clusters", 2, 400, 25)) affinity = cs.add_hyperparameter( CategoricalHyperparameter("affinity", ["euclidean", "manhattan", "cosine"], "euclidean")) linkage = cs.add_hyperparameter( CategoricalHyperparameter("linkage", ["ward", "complete", "average"], "ward")) pooling_func = cs.add_hyperparameter( CategoricalHyperparameter("pooling_func", ["mean", "median", "max"])) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenInClause(affinity, ["manhattan", "cosine"]), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') # This is set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs
def write(configuration_space): """ Writes a configurations space to file in pcs_new format. Parameters ---------- configuration_space : :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` a configuration space Returns ------- str The string representation of the configuration space """ if not isinstance(configuration_space, ConfigurationSpace): raise TypeError("pcs_parser.write expects an instance of %s, " "you provided '%s'" % (ConfigurationSpace, type(configuration_space))) param_lines = StringIO() condition_lines = StringIO() forbidden_lines = [] for hyperparameter in configuration_space.get_hyperparameters(): # Check if the hyperparameter names are valid SMAC names! try: pp_param_name.parseString(hyperparameter.name) except pyparsing.ParseException: raise ValueError("Illegal hyperparameter name for SMAC: %s" % hyperparameter.name) # First build params if param_lines.tell() > 0: param_lines.write("\n") if isinstance(hyperparameter, NumericalHyperparameter): param_lines.write(build_continuous(hyperparameter)) elif isinstance(hyperparameter, CategoricalHyperparameter): param_lines.write(build_categorical(hyperparameter)) elif isinstance(hyperparameter, OrdinalHyperparameter): param_lines.write(build_ordinal(hyperparameter)) elif isinstance(hyperparameter, Constant): param_lines.write(build_constant(hyperparameter)) else: raise TypeError("Unknown type: %s (%s)" % (type(hyperparameter), hyperparameter)) for condition in configuration_space.get_conditions(): if condition_lines.tell() > 0: condition_lines.write("\n") if isinstance(condition, AndConjunction) or isinstance( condition, OrConjunction): condition_lines.write(build_conjunction(condition)) else: condition_lines.write(build_condition(condition)) for forbidden_clause in configuration_space.get_forbiddens(): # Convert in-statement into two or more equals statements dlcs = forbidden_clause.get_descendant_literal_clauses() # First, get all in statements and convert them to equal statements in_statements = [] other_statements = [] for dlc in dlcs: if isinstance(dlc, MultipleValueForbiddenClause): if not isinstance(dlc, ForbiddenInClause): raise ValueError("SMAC cannot handle this forbidden " "clause: %s" % dlc) in_statements.append([ ForbiddenEqualsClause(dlc.hyperparameter, value) for value in dlc.values ]) else: other_statements.append(dlc) # Second, create the product of all elements in the IN statements, # create a ForbiddenAnd and add all ForbiddenEquals if len(in_statements) > 0: for i, p in enumerate(product(*in_statements)): all_forbidden_clauses = list(p) + other_statements f = ForbiddenAndConjunction(*all_forbidden_clauses) forbidden_lines.append(build_forbidden(f)) else: forbidden_lines.append(build_forbidden(forbidden_clause)) if condition_lines.tell() > 0: condition_lines.seek(0) param_lines.write("\n\n") for line in condition_lines: param_lines.write(line) if len(forbidden_lines) > 0: forbidden_lines.sort() param_lines.write("\n\n") for line in forbidden_lines: param_lines.write(line) param_lines.write("\n") # Check if the default configuration is a valid configuration! param_lines.seek(0) return param_lines.getvalue()
def read(pcs_string, debug=False): """ Reads in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- >>> from ConfigSpace.read_and_write import pcs_new >>> with open('configspace.pcs', 'r') as fh: >>> restored_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The restored ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default_value = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default_value = param_list[-2] param = create["categorical"](name=name, choices=choices, default_value=default_value) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default_value = param_list[-2] param = create["ordinal"](name=name, sequence=sequence, default_value=default_value) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append( condition_specification( child_name, element_list, configuration_space)) else: # now taking care of ands ands = [] for and_part in condition: element_list = [ element for part in condition for element in and_part.split() ] ands.append( condition_specification( child_name, element_list, configuration_space)) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [ element for element in cond_parts.split() ] ors.append( condition_specification(child_name, element_list, configuration_space)) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [ element for element in cond_parts.split() ] ands.append( condition_specification(child_name, element_list, configuration_space)) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification( child_name, element_list, configuration_space) configuration_space.add_condition(normal_condition) return configuration_space
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge")) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "True"), ForbiddenEqualsClause(penalty, "l1")) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs elif optimizer == 'tpe': space = { 'penalty': hp.choice('liblinear_combination', [{ 'penalty': "l1", 'loss': "squared_hinge", 'dual': "False" }, { 'penalty': "l2", 'loss': "hinge", 'dual': "True" }, { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "True" }, { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "False" }]), 'loss': None, 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'multi_class': hp.choice('liblinear_multi_class', ["ovr"]), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1]) } init_trial = { 'penalty': { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "True" }, 'tol': 1e-4, 'C': 1, 'multiclass': "ovr", 'fit_intercept': "True", 'intercept_scaling': 1 } return space
def _get_hyperparameter_search_space(self, include=None, exclude=None, dataset_properties=None): """Return the configuration space for the CASH problem. Parameters ---------- include_estimators : list of str If include_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include only the SVM use :python:`include_regressors=['svr']`. Cannot be used together with :python:`exclude_regressors`. exclude_estimators : list of str If exclude_estimators is given, only the regressors specified are used. Specify them by their module name; e.g., to include all regressors except the SVM use :python:`exclude_regressors=['svr']`. Cannot be used together with :python:`include_regressors`. include_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include only the PCA use :python:`include_preprocessors=['pca']`. Cannot be used together with :python:`exclude_preprocessors`. exclude_preprocessors : list of str If include_preprocessors is given, only the preprocessors specified are used. Specify them by their module name; e.g., to include all preprocessors except the PCA use :python:`exclude_preprocessors=['pca']`. Cannot be used together with :python:`include_preprocessors`. Returns ------- cs : ConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'regression' if dataset_properties['target_type'] != 'regression': dataset_properties['target_type'] = 'regression' if 'sparse' not in dataset_properties: # This dataset is probaby dense dataset_properties['sparse'] = False cs = self._get_base_search_space( cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) regressors = cs.get_hyperparameter('regressor:__choice__').choices preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices available_regressors = self._final_estimator.get_available_components( dataset_properties) possible_default_regressor = copy.copy(list( available_regressors.keys())) default = cs.get_hyperparameter('regressor:__choice__').default_value del possible_default_regressor[ possible_default_regressor.index(default)] # A regressor which can handle sparse data after the densifier is # forbidden for memory issues for key in regressors: if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'regressor:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'feature_preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default_value = default # which would take too long # Combinations of tree-based models with feature learning: regressors_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest", "xgradient_boosting"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] for r, f in product(regressors_, feature_learning_): if r not in regressors: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "regressor:__choice__"), r), ForbiddenEqualsClause(cs.get_hyperparameter( "feature_preprocessor:__choice__"), f))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_regressor.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'regressor:__choice__').default_value = default self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def add_forbidden(conf_space, pipeline, matches, dataset_properties, include, exclude): # Not sure if this works for 3D node_i_is_choice = [] node_i_choices_names = [] node_i_choices = [] all_nodes = [] for node_name, node in pipeline: all_nodes.append(node) is_choice = hasattr(node, "get_available_components") node_i_is_choice.append(is_choice) node_include = include.get( node_name) if include is not None else None node_exclude = exclude.get( node_name) if exclude is not None else None if is_choice: node_i_choices_names.append(node.get_available_components( dataset_properties, include=node_include, exclude=node_exclude).keys()) node_i_choices.append(node.get_available_components( dataset_properties, include=node_include, exclude=node_exclude).values()) else: node_i_choices_names.append([node_name]) node_i_choices.append([node]) # Find out all chains of choices. Only in such a chain its possible to # have several forbidden constraints choices_chains = [] idx = 0 while idx < len(pipeline): if node_i_is_choice[idx]: chain_start = idx idx += 1 while idx < len(pipeline) and node_i_is_choice[idx]: idx += 1 chain_stop = idx choices_chains.append((chain_start, chain_stop)) idx += 1 for choices_chain in choices_chains: constraints = set() chain_start = choices_chain[0] chain_stop = choices_chain[1] chain_length = chain_stop - chain_start # Add one to have also have chain_length in the range for sub_chain_length in range(2, chain_length + 1): for start_idx in range(chain_start, chain_stop - sub_chain_length + 1): indices = range(start_idx, start_idx + sub_chain_length) node_names = [pipeline[idx][0] for idx in indices] num_node_choices = [] node_choice_names = [] skip_array_shape = [] for idx in indices: node = all_nodes[idx] available_components = node.get_available_components( dataset_properties, include=node_i_choices_names[idx]) assert len(available_components) > 0, len(available_components) skip_array_shape.append(len(available_components)) num_node_choices.append(range(len(available_components))) node_choice_names.append([name for name in available_components]) # Figure out which choices were already abandoned skip_array = np.zeros(skip_array_shape) for product in itertools.product(*num_node_choices): for node_idx, choice_idx in enumerate(product): node_idx += start_idx slices_ = tuple( slice(None) if idx != node_idx else slice(choice_idx, choice_idx + 1) for idx in range(len(matches.shape))) if np.sum(matches[slices_]) == 0: skip_array[product] = 1 for product in itertools.product(*num_node_choices): if skip_array[product]: continue slices = tuple( slice(None) if idx not in indices else slice(product[idx - start_idx], product[idx - start_idx] + 1) for idx in range(len(matches.shape))) # This prints the affected nodes # print [node_choice_names[i][product[i]] # for i in range(len(product))], \ # np.sum(matches[slices]) if np.sum(matches[slices]) == 0: constraint = tuple([(node_names[i], node_choice_names[i][product[i]]) for i in range(len(product))]) # Check if a more general constraint/forbidden clause # was already added continue_ = False for constraint_length in range(2, len(constraint)): for constraint_start_idx in range(len(constraint) - constraint_length + 1): sub_constraint = constraint[ constraint_start_idx:constraint_start_idx + constraint_length] if sub_constraint in constraints: continue_ = True break if continue_: break if continue_: continue constraints.add(constraint) forbiddens = [] for i in range(len(product)): forbiddens.append( ForbiddenEqualsClause(conf_space.get_hyperparameter( node_names[i] + ":__choice__"), node_choice_names[i][product[i]])) forbidden = ForbiddenAndConjunction(*forbiddens) conf_space.add_forbidden_clause(forbidden) return conf_space
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import CategoricalHyperparameter, \ UniformIntegerHyperparameter from ConfigSpace.forbidden import ForbiddenInClause, \ ForbiddenAndConjunction, ForbiddenEqualsClause from automl.utl import json_utils cs = ConfigurationSpace() n_clusters = UniformIntegerHyperparameter("n_clusters", 2, 400, 2) affinity = CategoricalHyperparameter( "affinity", ["euclidean", "manhattan", "cosine", "l1", "l2"], "euclidean") linkage = CategoricalHyperparameter("linkage", ["ward", "complete", "average", "single"], "ward") cs.add_hyperparameters([n_clusters, affinity, linkage]) affinity_and_linkage = ForbiddenAndConjunction( ForbiddenAndConjunction(ForbiddenEqualsClause(affinity, "manhattan"), ForbiddenEqualsClause(affinity, "cosine"), ForbiddenEqualsClause(affinity, "l1"), ForbiddenEqualsClause(affinity, "l2")), ForbiddenEqualsClause(linkage, "ward")) cs.add_forbidden_clause(affinity_and_linkage) json_utils.write_cs_to_json_file(cs, "FeatureAgglomeration")
def _construct_forbidden_and( clause: Dict, cs: ConfigurationSpace, ) -> ForbiddenAndConjunction: clauses = [_construct_forbidden(cl, cs) for cl in clause['clauses']] return ForbiddenAndConjunction(*clauses)
def write(configuration_space): if not isinstance(configuration_space, ConfigurationSpace): raise TypeError("irace.write expects an instance of %s, " "you provided '%s'" % (ConfigurationSpace, type(configuration_space))) param_lines = io.StringIO() condition_lines = io.StringIO() forbidden_lines = [] for hyperparameter in configuration_space.get_hyperparameters(): # Check if the hyperparameter names are valid IRACE names! try: pp_param_name.parseString(hyperparameter.name) except pyparsing.ParseException: raise ValueError("Illegal hyperparameter name for IRACE: %s" % hyperparameter.name) # First build params if param_lines.tell() > 0: param_lines.write("\n") if isinstance(hyperparameter, NumericalHyperparameter): # print "building countinuous param" param_lines.write(build_continuous(hyperparameter)) elif isinstance(hyperparameter, CategoricalHyperparameter): # print "building categorical param" param_lines.write(build_categorical(hyperparameter)) elif isinstance(hyperparameter, Constant): # print "building constant param" param_lines.write(build_constant(hyperparameter)) elif isinstance(hyperparameter, OrdinalHyperparameter): # print "building constant param" param_lines.write(build_ordinal(hyperparameter)) else: raise TypeError("Unknown type: %s (%s)" % (type(hyperparameter), hyperparameter)) for condition in configuration_space.get_conditions(): if condition_lines.tell() > 0: condition_lines.write("\n") condition_lines.write(build_condition(condition)) for forbidden_clause in configuration_space.get_forbiddens(): # Convert in-statement into two or more equals statements dlcs = forbidden_clause.get_descendant_literal_clauses() # First, get all in statements and convert them to equal statements in_statements = [] other_statements = [] for dlc in dlcs: if isinstance(dlc, MultipleValueForbiddenClause): if not isinstance(dlc, ForbiddenInClause): raise ValueError("IRACE cannot handle this forbidden " "clause: %s" % dlc) in_statements.append([ ForbiddenEqualsClause(dlc.hyperparameter, value) for value in dlc.values ]) else: other_statements.append(dlc) # Second, create the product of all elements in the IN statements, # create a ForbiddenAnd and add all ForbiddenEquals if len(in_statements) > 0: for i, p in enumerate(product(*in_statements)): all_forbidden_clauses = list(p) + other_statements f = ForbiddenAndConjunction(*all_forbidden_clauses) forbidden_lines.append(build_forbidden(f)) else: forbidden_lines.append(build_forbidden(forbidden_clause)) # Add conditions: first convert param_lines to array then search first part of condition in that array # if found append second part of condition to that array part splitted_params = param_lines.getvalue().split("\n") if condition_lines.tell() > 0: condition_lines.seek(0) param_lines.write("\n\n") for line in condition_lines: param_lines.write(line) t = filter(lambda x: line.split(" ")[0] in x, splitted_params) index = splitted_params.index(next(t)) splitted_params[index] = splitted_params[index] + " ".join( line.split(" ")[1:]) for i, j in enumerate(splitted_params): if j[-1] != "\n": splitted_params[i] += "\n" forbidden_lines_write = io.StringIO() if len(forbidden_lines) > 0: for forbidden in forbidden_lines: forbidden_lines_write.write(forbidden + '\n') output_fh = open('forbidden.txt', 'w') output_fh.write(forbidden_lines_write.getvalue()) output_fh.close() # overwrite param_lines with split_params which contains lines with conditions param_lines = io.StringIO() for l in splitted_params: param_lines.write(l) return param_lines.getvalue()
def _get_hyperparameter_search_space(self, include=None, exclude=None, dataset_properties=None): """Create the hyperparameter configuration space. Parameters ---------- include : dict (optional, default=None) Returns ------- """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance( dataset_properties, dict): dataset_properties = dict() if not 'target_type' in dataset_properties: dataset_properties['target_type'] = 'classification' if dataset_properties['target_type'] != 'classification': dataset_properties['target_type'] = 'classification' pipeline = self.steps cs = self._get_base_search_space(cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=pipeline) classifiers = cs.get_hyperparameter('classifier:__choice__').choices preprocessors = cs.get_hyperparameter( 'preprocessor:__choice__').choices available_classifiers = pipeline[-1][1].get_available_components( dataset_properties) available_preprocessors = pipeline[-2][1].get_available_components( dataset_properties) possible_default_classifier = copy.copy( list(available_classifiers.keys())) default = cs.get_hyperparameter('classifier:__choice__').default del possible_default_classifier[possible_default_classifier.index( default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in classifiers: if SPARSE in available_classifiers[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'classifier:__choice__'), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier'))) # Success break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration." ) cs.get_hyperparameter( 'classifier:__choice__').default = default # which would take too long # Combinations of non-linear models with feature learning: classifiers_ = [ "adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree", "xgradient_boosting" ] feature_learning = ["kitchen_sinks", "nystroem_sampler"] for c, f in product(classifiers_, feature_learning): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter("classifier:__choice__"), c), ForbiddenEqualsClause( cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default # Won't work # Multinomial NB etc don't use with features learning, pca etc classifiers_ = ["multinomial_nb"] preproc_with_negative_X = [ "kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler" ] for c, f in product(classifiers_, preproc_with_negative_X): if c not in classifiers: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause( cs.get_hyperparameter("classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default_classifier.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter( 'classifier:__choice__').default = default self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def read(pcs_string, debug=False): configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter } try: param_list = pp_cont_param.parseString(line) il = param_list[9:] if len(il) > 0: il = il[0] param_list = param_list[:9] name = param_list[0] lower = float(param_list[2]) upper = float(param_list[4]) paramtype = "int" if "i" in il else "float" log = True if "l" in il else False default = float(param_list[7]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log, default=default) cont_ct += 1 except pyparsing.ParseException: pass try: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [c for c in param_list[2:-4:2]] default = param_list[-2] param = create["categorical"](name=name, choices=choices, default=default) cat_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: # TODO test this properly! # TODO Add a try/catch here! # noinspection PyUnusedLocal param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': # TODO maybe add a check if the hyperparameter is # actually in the configuration space clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), tmp_list[2])) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) #Now handle conditions # If there are two conditions for one child, these two conditions are an # AND-conjunction of conditions, thus we have to connect them conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: condition_objects = [] for condition in conditions_per_child[child_name]: child = configuration_space.get_hyperparameter(child_name) parent_name = condition[2] parent = configuration_space.get_hyperparameter(parent_name) restrictions = condition[5:-1:2] # TODO: cast the type of the restriction! if len(restrictions) == 1: condition = EqualsCondition(child, parent, restrictions[0]) else: condition = InCondition(child, parent, values=restrictions) condition_objects.append(condition) # Now we have all condition objects for this child, so we can build a # giant AND-conjunction of them (if number of conditions >= 2)! if len(condition_objects) > 1: and_conjunction = AndConjunction(*condition_objects) configuration_space.add_condition(and_conjunction) else: configuration_space.add_condition(condition_objects[0]) return configuration_space
def read(pcs_string, debug=False): """ Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` definition from a pcs file. Example ------- .. testsetup:: pcs_new_test from ConfigSpace import ConfigurationSpace import ConfigSpace.hyperparameters as CSH from ConfigSpace.read_and_write import pcs_new cs = ConfigurationSpace() cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3])) with open('configspace.pcs_new', 'w') as f: f.write(pcs_new.write(cs)) .. doctest:: pcs_new_test >>> from ConfigSpace.read_and_write import pcs_new >>> with open('configspace.pcs_new', 'r') as fh: ... deserialized_conf = pcs_new.read(fh) Parameters ---------- pcs_string : str ConfigSpace definition in pcs format debug : bool Provides debug information. Defaults to False. Returns ------- :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace` The deserialized ConfigurationSpace object """ configuration_space = ConfigurationSpace() conditions = [] forbidden = [] # some statistics ct = 0 cont_ct = 0 cat_ct = 0 ord_ct = 0 line_ct = 0 for line in pcs_string: line_ct += 1 if "#" in line: # It contains a comment pos = line.find("#") line = line[:pos] # Remove quotes and whitespaces at beginning and end line = line.replace('"', "").replace("'", "") line = line.strip() if "|" in line: # It's a condition try: c = pp_condition.parseString(line) conditions.append(c) except pyparsing.ParseException: raise NotImplementedError("Could not parse condition: %s" % line) continue if "}" not in line and "]" not in line: continue if line.startswith("{") and line.endswith("}"): forbidden.append(line) continue if len(line.strip()) == 0: continue ct += 1 param = None create = { "int": UniformIntegerHyperparameter, "float": UniformFloatHyperparameter, "categorical": CategoricalHyperparameter, "ordinal": OrdinalHyperparameter } try: param_list = pp_cont_param.parseString(line) name = param_list[0] if param_list[1] == 'integer': paramtype = 'int' elif param_list[1] == 'real': paramtype = 'float' else: paramtype = None if paramtype in ['int', 'float']: log = param_list[10:] param_list = param_list[:10] if len(log) > 0: log = log[0] lower = float(param_list[3]) upper = float(param_list[5]) log_on = True if "log" in log else False default_value = float(param_list[8]) param = create[paramtype](name=name, lower=lower, upper=upper, q=None, log=log_on, default_value=default_value) cont_ct += 1 except pyparsing.ParseException: pass try: if "categorical" in line: param_list = pp_cat_param.parseString(line) name = param_list[0] choices = [choice for choice in param_list[3:-4:2]] default_value = param_list[-2] param = create["categorical"]( name=name, choices=choices, default_value=default_value, ) cat_ct += 1 elif "ordinal" in line: param_list = pp_ord_param.parseString(line) name = param_list[0] sequence = [seq for seq in param_list[3:-4:2]] default_value = param_list[-2] param = create["ordinal"]( name=name, sequence=sequence, default_value=default_value, ) ord_ct += 1 except pyparsing.ParseException: pass if param is None: raise NotImplementedError("Could not parse: %s" % line) configuration_space.add_hyperparameter(param) for clause in forbidden: param_list = pp_forbidden_clause.parseString(clause) tmp_list = [] clause_list = [] for value in param_list[1:]: if len(tmp_list) < 3: tmp_list.append(value) else: # So far, only equals is supported by SMAC if tmp_list[1] == '=': hp = configuration_space.get_hyperparameter(tmp_list[0]) if isinstance(hp, NumericalHyperparameter): if isinstance(hp, IntegerHyperparameter): forbidden_value = int(tmp_list[2]) elif isinstance(hp, FloatHyperparameter): forbidden_value = float(tmp_list[2]) else: raise NotImplementedError if forbidden_value < hp.lower or forbidden_value > hp.upper: raise ValueError( f'forbidden_value is set out of the bound, it needs to' f' be set between [{hp.lower}, {hp.upper}]' f' but its value is {forbidden_value}') elif isinstance( hp, (CategoricalHyperparameter, OrdinalHyperparameter)): hp_values = hp.choices if isinstance(hp, CategoricalHyperparameter)\ else hp.sequence forbidden_value_in_hp_values = tmp_list[2] in hp_values if forbidden_value_in_hp_values: forbidden_value = tmp_list[2] else: raise ValueError( f'forbidden_value is set out of the allowed value ' f'sets, it needs to be one member from {hp_values} ' f'but its value is {forbidden_value}') else: raise ValueError('Unsupported Hyperparamter sorts') clause_list.append( ForbiddenEqualsClause( configuration_space.get_hyperparameter( tmp_list[0]), forbidden_value)) else: raise NotImplementedError() tmp_list = [] configuration_space.add_forbidden_clause( ForbiddenAndConjunction(*clause_list)) conditions_per_child = OrderedDict() for condition in conditions: child_name = condition[0] if child_name not in conditions_per_child: conditions_per_child[child_name] = list() conditions_per_child[child_name].append(condition) for child_name in conditions_per_child: for condition in conditions_per_child[child_name]: condition = condition[2:] condition = ' '.join(condition) if '||' in str(condition): ors = [] # 1st case we have a mixture of || and && if '&&' in str(condition): ors_combis = [] for cond_parts in str(condition).split('||'): condition = str(cond_parts).split('&&') # if length is 1 it must be or if len(condition) == 1: element_list = condition[0].split() ors_combis.append( condition_specification( child_name, element_list, configuration_space, )) else: # now taking care of ands ands = [] for and_part in condition: element_list = [ element for part in condition for element in and_part.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) ors_combis.append(AndConjunction(*ands)) mixed_conjunction = OrConjunction(*ors_combis) configuration_space.add_condition(mixed_conjunction) else: # 2nd case: we only have ors for cond_parts in str(condition).split('||'): element_list = [ element for element in cond_parts.split() ] ors.append( condition_specification( child_name, element_list, configuration_space, )) or_conjunction = OrConjunction(*ors) configuration_space.add_condition(or_conjunction) else: # 3rd case: we only have ands if '&&' in str(condition): ands = [] for cond_parts in str(condition).split('&&'): element_list = [ element for element in cond_parts.split() ] ands.append( condition_specification( child_name, element_list, configuration_space, )) and_conjunction = AndConjunction(*ands) configuration_space.add_condition(and_conjunction) else: # 4th case: we have a normal condition element_list = [element for element in condition.split()] normal_condition = condition_specification( child_name, element_list, configuration_space, ) configuration_space.add_condition(normal_condition) return configuration_space
def _get_hyperparameter_search_space( self, dataset_properties: Dict[str, BaseDatasetPropertiesType], include: Optional[Dict[str, Any]] = None, exclude: Optional[Dict[str, Any]] = None, ) -> ConfigurationSpace: """Create the hyperparameter configuration space. For the given steps, and the Choices within that steps, this procedure returns a configuration space object to explore. Args: include (Optional[Dict[str, Any]]): What hyper-parameter configurations to honor when creating the configuration space exclude (Optional[Dict[str, Any]]): What hyper-parameter configurations to remove from the configuration space dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Characteristics of the dataset to guide the pipeline choices of components Returns: cs (ConfigurationSpace): The configuration space describing the TabularClassificationPipeline. """ cs = ConfigurationSpace() if not isinstance(dataset_properties, dict): warnings.warn( 'The given dataset_properties argument contains an illegal value.' 'Proceeding with the default value') dataset_properties = dict() if 'target_type' not in dataset_properties: dataset_properties['target_type'] = 'tabular_classification' if dataset_properties['target_type'] != 'tabular_classification': warnings.warn( 'Tabular classification is being used, however the target_type' 'is not given as "tabular_classification". Overriding it.') dataset_properties['target_type'] = 'tabular_classification' # get the base search space given this # dataset properties. Then overwrite with custom # classification requirements cs = self._get_base_search_space(cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) # Here we add custom code, that is used to ensure valid configurations, For example # Learned Entity Embedding is only valid when encoder is one hot encoder if 'network_embedding' in self.named_steps.keys( ) and 'encoder' in self.named_steps.keys(): embeddings = cs.get_hyperparameter( 'network_embedding:__choice__').choices if 'LearnedEntityEmbedding' in embeddings: encoders = cs.get_hyperparameter('encoder:__choice__').choices possible_default_embeddings = copy.copy(list(embeddings)) del possible_default_embeddings[ possible_default_embeddings.index( 'LearnedEntityEmbedding')] for encoder in encoders: if encoder == 'OneHotEncoder': continue while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( 'network_embedding:__choice__'), 'LearnedEntityEmbedding'), ForbiddenEqualsClause( cs.get_hyperparameter( 'encoder:__choice__'), encoder))) break except ValueError: # change the default and try again try: default = possible_default_embeddings.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration" ) cs.get_hyperparameter( 'network_embedding:__choice__' ).default_value = default self.configuration_space = cs self.dataset_properties = dataset_properties return cs
def get_hyperspace(data_info, include_estimators=None, include_preprocessors=None): if data_info is None or not isinstance(data_info, dict): data_info = dict() if 'is_sparse' not in data_info: # This dataset is probaby dense data_info['is_sparse'] = False sparse = data_info['is_sparse'] task_type = data_info['task'] multilabel = (task_type == MULTILABEL_CLASSIFICATION) multiclass = (task_type == MULTICLASS_CLASSIFICATION) if task_type in CLASSIFICATION_TASKS: data_info['multilabel'] = multilabel data_info['multiclass'] = multiclass data_info['target_type'] = 'classification' pipe_type = 'classifier' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gradient_boosting", "k_nearest_neighbors", "libsvm_svc", "random_forest", "gaussian_nb", "decision_tree"] feature_learning_ = ["kitchen_sinks", "nystroem_sampler"] elif task_type in REGRESSION_TASKS: data_info['target_type'] = 'regression' pipe_type = 'regressor' # Components match to be forbidden components_ = ["adaboost", "decision_tree", "extra_trees", "gaussian_process", "gradient_boosting", "k_nearest_neighbors", "random_forest"] feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"] else: raise NotImplementedError() include, exclude = dict(), dict() if include_preprocessors is not None: include["preprocessor"] = include_preprocessors if include_estimators is not None: include[pipe_type] = include_estimators cs = ConfigurationSpace() # Construct pipeline # FIXME OrderedDIct? pipeline = get_pipeline(data_info['task']) # TODO include, exclude, pipeline keys = [pair[0] for pair in pipeline] for key in include: if key not in keys: raise ValueError('Invalid key in include: %s; should be one ' 'of %s' % (key, keys)) for key in exclude: if key not in keys: raise ValueError('Invalid key in exclude: %s; should be one ' 'of %s' % (key, keys)) # Construct hyperspace # TODO What's the 'signed' stands for? if 'signed' not in data_info: # This dataset probably contains unsigned data data_info['signed'] = False match = check_pipeline(pipeline, data_info, include=include, exclude=exclude) # Now we have only legal combinations at this step of the pipeline # Simple sanity checks assert np.sum(match) != 0, "No valid pipeline found." assert np.sum(match) <= np.size(match), \ "'matches' is not binary; %s <= %d, %s" % \ (str(np.sum(match)), np.size(match), str(match.shape)) # Iterate each dimension of the matches array (each step of the # pipeline) to see if we can add a hyperparameter for that step for node_idx, n_ in enumerate(pipeline): node_name, node = n_ is_choice = hasattr(node, "get_available_components") # if the node isn't a choice we can add it immediately because it # must be active (if it wouldn't, np.sum(matches) would be zero if not is_choice: cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info)) # If the node isn't a choice, we have to figure out which of it's # choices are actually legal choices else: choices_list = find_active_choices(match, node, node_idx,data_info, include=include.get(node_name), exclude=exclude.get(node_name)) cs.add_configuration_space(node_name, node.get_hyperparameter_search_space(data_info, include=choices_list)) # And now add forbidden parameter configurations # According to matches if np.sum(match) < np.size(match): cs = add_forbidden(conf_space=cs, pipeline=pipeline, matches=match, dataset_properties=data_info, include=include, exclude=exclude) components = cs.get_hyperparameter('%s:__choice__' % pipe_type).choices availables = pipeline[-1][1].get_available_components(data_info) preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices #available_preprocessors = pipeline[-2][1].get_available_components(data_info) possible_default = copy.copy(list(availables.keys())) default = cs.get_hyperparameter('%s:__choice__' % pipe_type).default del possible_default[possible_default.index(default)] # A classifier which can handle sparse data after the densifier is # forbidden for memory issues for key in components: # TODO regression dataset_properties=None if SPARSE in availables[key].get_properties()['input']: if 'densifier' in preprocessors: while True: try: cs.add_forbidden_clause( ForbiddenAndConjunction( ForbiddenEqualsClause( cs.get_hyperparameter( '%s:__choice__' % pipe_type), key), ForbiddenEqualsClause( cs.get_hyperparameter( 'preprocessor:__choice__'), 'densifier') )) # Success break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError("Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default # which would take too long # Combinations of non-linear models with feature learning: for c, f in itertools.product(components_, feature_learning_): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "%s:__choice__" % pipe_type), c), ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f))) break except KeyError: break except ValueError as e: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('%s:__choice__' % pipe_type).default = default if task_type in CLASSIFICATION_TASKS: # Won't work # Multinomial NB etc don't use with features learning, pca etc components_ = ["multinomial_nb"] preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD", "fast_ica", "kernel_pca", "nystroem_sampler"] for c, f in itertools.product(components_, preproc_with_negative_X): if c not in components: continue if f not in preprocessors: continue while True: try: cs.add_forbidden_clause(ForbiddenAndConjunction( ForbiddenEqualsClause(cs.get_hyperparameter( "preprocessor:__choice__"), f), ForbiddenEqualsClause(cs.get_hyperparameter( "classifier:__choice__"), c))) break except KeyError: break except ValueError: # Change the default and try again try: default = possible_default.pop() except IndexError: raise ValueError( "Cannot find a legal default configuration.") cs.get_hyperparameter('classifier:__choice__').default = default return cs