def get_config_space(num_layers=((1, 15), False), num_units=((10, 1024), True), activation=('sigmoid', 'tanh', 'relu'), dropout=(0.0, 0.8), use_dropout=(True, False), **kwargs): cs = CS.ConfigurationSpace() num_layers_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, 'num_layers', num_layers) cs.add_hyperparameter(num_layers_hp) use_dropout_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_dropout", use_dropout) for i in range(1, num_layers[0][1] + 1): n_units_hp = get_hyperparameter( CSH.UniformIntegerHyperparameter, "num_units_%d" % i, kwargs.pop("num_units_%d" % i, num_units)) cs.add_hyperparameter(n_units_hp) if i > num_layers[0][0]: cs.add_condition( CS.GreaterThanCondition(n_units_hp, num_layers_hp, i - 1)) if True in use_dropout: dropout_hp = get_hyperparameter( CSH.UniformFloatHyperparameter, "dropout_%d" % i, kwargs.pop("dropout_%d" % i, dropout)) cs.add_hyperparameter(dropout_hp) dropout_condition_1 = CS.EqualsCondition( dropout_hp, use_dropout_hp, True) if i > num_layers[0][0]: dropout_condition_2 = CS.GreaterThanCondition( dropout_hp, num_layers_hp, i - 1) cs.add_condition( CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'activation', activation) assert len( kwargs ) == 0, "Invalid hyperparameter updates for mlpnet: %s" % str(kwargs) return (cs)
def get_config_space(user_updates=None): cs = CS.ConfigurationSpace() range_num_layers = (1, 15) range_num_units = (10, 1024) possible_activations = ('sigmoid', 'tanh', 'relu') range_dropout = (0.0, 0.8) if user_updates is not None and 'num_layers' in user_updates: range_num_layers = user_updates['num_layers'] num_layers = CSH.UniformIntegerHyperparameter( 'num_layers', lower=range_num_layers[0], upper=range_num_layers[1]) cs.add_hyperparameter(num_layers) use_dropout = cs.add_hyperparameter( CS.CategoricalHyperparameter("use_dropout", [True, False], default_value=True)) for i in range(1, range_num_layers[1] + 1): n_units = CSH.UniformIntegerHyperparameter( "num_units_%d" % i, lower=range_num_units[0], upper=range_num_units[1], log=True) cs.add_hyperparameter(n_units) dropout = CSH.UniformFloatHyperparameter("dropout_%d" % i, lower=range_dropout[0], upper=range_dropout[1]) cs.add_hyperparameter(dropout) dropout_condition_1 = CS.EqualsCondition(dropout, use_dropout, True) if i > range_num_layers[0]: cs.add_condition( CS.GreaterThanCondition(n_units, num_layers, i - 1)) dropout_condition_2 = CS.GreaterThanCondition( dropout, num_layers, i - 1) cs.add_condition( CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) cs.add_hyperparameter( CSH.CategoricalHyperparameter('activation', possible_activations)) return (cs)
def get_configspace(): import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH """ It builds the configuration space with the needed hyperparameters. It is easily possible to implement different types of hyperparameters. Beside float-hyperparameters on a log scale, it is also able to handle categorical input parameter. :return: ConfigurationsSpace-Object """ cs = CS.ConfigurationSpace() scaler = CSH.CategoricalHyperparameter('scaler', [ 'None', 'StandardScaler', 'RobustScaler', 'MinMaxScaler', 'MaxAbsScaler' ]) init = CSH.CategoricalHyperparameter('init', [ 'uniform', 'normal', 'glorot_uniform', 'glorot_normal', 'he_uniform', 'he_normal' ]) batch_size = CSH.CategoricalHyperparameter('batch_size', [16, 32, 64, 128, 256]) shuffle = CSH.CategoricalHyperparameter('shuffle', [True, False]) loss = CSH.CategoricalHyperparameter( 'loss', ['mean_absolute_error', 'mean_squared_error']) optimizer = CSH.CategoricalHyperparameter( 'optimizer', ['rmsprop', 'adagrad', 'adadelta', 'adam', 'adamax']) cs.add_hyperparameters( [scaler, init, batch_size, shuffle, loss, optimizer]) n_layers = CSH.UniformIntegerHyperparameter('n_layers', lower=1, upper=5, default_value=2) layer_sizes = [ CSH.UniformIntegerHyperparameter('layer_{}_size'.format(l), lower=2, upper=100, default_value=16, log=True) for l in range(1, 6) ] layer_activations = [ CSH.CategoricalHyperparameter('layer_{}_activation'.format(l), ['relu', 'sigmoid', 'tanh']) for l in range(1, 6) ] layer_extras = [ CSH.CategoricalHyperparameter('layer_{}_extras'.format(l), ['None', 'dropout', 'batchnorm']) for l in range(1, 6) ] dropout_rates = [ CSH.UniformFloatHyperparameter('dropout_rate_{}'.format(l), lower=0.1, upper=0.5, default_value=0.2, log=False) for l in range(1, 6) ] cs.add_hyperparameters([n_layers] + layer_sizes + layer_activations + layer_extras + dropout_rates) conditions = [ CS.GreaterThanCondition(layer_sizes[n], n_layers, n) for n in range(1, 5) ] conditions = conditions + \ [CS.GreaterThanCondition(layer_activations[n], n_layers, n) for n in range(1, 5)] conditions = conditions + \ [CS.GreaterThanCondition(layer_extras[n], n_layers, n) for n in range(1, 5)] equal_conditions = [ CS.EqualsCondition(dropout_rates[n], layer_extras[n], 'dropout') for n in range(0, 5) ] greater_size_conditions = [ CS.GreaterThanCondition(dropout_rates[n], n_layers, n) for n in range(1, 5) ] for c in conditions: cs.add_condition(c) cs.add_condition(equal_conditions[0]) for j in range(0, 4): cond = CS.AndConjunction(greater_size_conditions[j], equal_conditions[j + 1]) cs.add_condition(cond) return cs
def get_config_space(num_groups=((1, 9), False), blocks_per_group=((1, 4), False), num_units=((10, 1024), True), activation=('sigmoid', 'tanh', 'relu'), max_shake_drop_probability=(0, 1), dropout=(0, 1.0), use_shake_drop=(True, False), use_shake_shake=(True, False), use_dropout=(True, False), **kwargs): cs = ConfigSpace.ConfigurationSpace() num_groups_hp = get_hyperparameter( ConfigSpace.UniformIntegerHyperparameter, "num_groups", num_groups) cs.add_hyperparameter(num_groups_hp) blocks_per_group_hp = get_hyperparameter( ConfigSpace.UniformIntegerHyperparameter, "blocks_per_group", blocks_per_group) cs.add_hyperparameter(blocks_per_group_hp) add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "activation", activation) use_dropout_hp = get_hyperparameter( ConfigSpace.CategoricalHyperparameter, "use_dropout", use_dropout) cs.add_hyperparameter(use_dropout_hp) add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "use_shake_shake", use_shake_shake) use_shake_drop_hp = add_hyperparameter( cs, ConfigSpace.CategoricalHyperparameter, "use_shake_drop", use_shake_drop) if True in use_shake_drop: shake_drop_prob_hp = add_hyperparameter( cs, ConfigSpace.UniformFloatHyperparameter, "max_shake_drop_probability", max_shake_drop_probability) cs.add_condition( ConfigSpace.EqualsCondition(shake_drop_prob_hp, use_shake_drop_hp, True)) # it is the upper bound of the nr of groups, since the configuration will actually be sampled. for i in range(0, num_groups[0][1] + 1): n_units_hp = add_hyperparameter( cs, ConfigSpace.UniformIntegerHyperparameter, "num_units_%d" % i, kwargs.pop("num_units_%d" % i, num_units)) if i > 1: cs.add_condition( ConfigSpace.GreaterThanCondition(n_units_hp, num_groups_hp, i - 1)) if True in use_dropout: dropout_hp = add_hyperparameter( cs, ConfigSpace.UniformFloatHyperparameter, "dropout_%d" % i, kwargs.pop("dropout_%d" % i, dropout)) dropout_condition_1 = ConfigSpace.EqualsCondition( dropout_hp, use_dropout_hp, True) if i > 1: dropout_condition_2 = ConfigSpace.GreaterThanCondition( dropout_hp, num_groups_hp, i - 1) cs.add_condition( ConfigSpace.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) assert len( kwargs ) == 0, "Invalid hyperparameter updates for resnet: %s" % str(kwargs) return cs
def get_config_space( growth_rate_range=(5, 128), nr_blocks=(1, 5), kernel_range=(2, 7), layer_range=(5, 50), activations=all_activations.keys(), conv_init=('random', 'kaiming_normal', 'constant_0', 'constant_1', 'constant_05'), batchnorm_weight_init=('random', 'constant_0', 'constant_1', 'constant_05'), batchnorm_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'), linear_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'), **kwargs): import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter cs = CS.ConfigurationSpace() growth_rate_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'growth_rate', growth_rate_range) first_conv_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_conv_kernel', kernel_range) first_pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_pool_kernel', kernel_range) conv_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'conv_init', conv_init) batchnorm_weight_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_weight_init', batchnorm_weight_init) batchnorm_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_bias_init', batchnorm_bias_init) linear_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'linear_bias_init', linear_bias_init) first_activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'first_activation', sorted(set(activations).intersection(all_activations))) blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'blocks', nr_blocks) cs.add_hyperparameter(growth_rate_hp) cs.add_hyperparameter(first_conv_kernel_hp) cs.add_hyperparameter(first_pool_kernel_hp) cs.add_hyperparameter(conv_init_hp) cs.add_hyperparameter(batchnorm_weight_init_hp) cs.add_hyperparameter(batchnorm_bias_init_hp) cs.add_hyperparameter(linear_bias_init_hp) cs.add_hyperparameter(first_activation_hp) cs.add_hyperparameter(blocks_hp) add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'channel_reduction', [0.1, 0.9]) add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'last_image_size', [0, 1]) add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'bottleneck', [True, False]) use_dropout = add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'use_dropout', [True, False]) if type(nr_blocks[0]) == int: min_blocks = nr_blocks[0] max_blocks = nr_blocks[1] else: min_blocks = nr_blocks[0][0] max_blocks = nr_blocks[0][1] for i in range(1, max_blocks+1): layer_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'layer_in_block_%d' % i, layer_range) pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'pool_kernel_%d' % i, kernel_range) activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'activation_%d' % i, sorted(set(activations).intersection(all_activations))) cs.add_hyperparameter(layer_hp) cs.add_hyperparameter(pool_kernel_hp) cs.add_hyperparameter(activation_hp) dropout = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'dropout_%d' % i, [0.0, 1.0]) conv_kernel = add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'conv_kernel_%d' % i, [3, 5, 7]) if i > min_blocks: cs.add_condition(CS.GreaterThanCondition(layer_hp, blocks_hp, i-1)) cs.add_condition(CS.GreaterThanCondition(conv_kernel, blocks_hp, i-1)) cs.add_condition(CS.GreaterThanCondition(pool_kernel_hp, blocks_hp, i-1)) cs.add_condition(CS.GreaterThanCondition(activation_hp, blocks_hp, i-1)) cs.add_condition(CS.AndConjunction(CS.EqualsCondition(dropout, use_dropout, True), CS.GreaterThanCondition(dropout, blocks_hp, i-1))) else: cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True)) return cs
def get_hyperparameter_search_space(seed): """ Neural Network search space based on a best effort using the scikit-learn implementation. Note that for state of the art performance, other packages could be preferred. Parameters ---------- seed: int Random seed that will be used to sample random configurations Returns ------- cs: ConfigSpace.ConfigurationSpace The configuration space object """ cs = ConfigSpace.ConfigurationSpace('sklearn.neural_network.MLPClassifier', seed) strategy = ConfigSpace.CategoricalHyperparameter( name='columntransformer__numeric__imputer__strategy', choices=['mean', 'median', 'most_frequent']) hidden_layer_sizes = ConfigSpace.UniformIntegerHyperparameter( name='mlpclassifier__hidden_layer_sizes', lower=32, upper=2048, default_value=2048) activation = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__activation', choices=['identity', 'logistic', 'tanh', 'relu'], default_value='relu') solver = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__solver', choices=['lbfgs', 'sgd', 'adam'], default_value='adam') alpha = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__alpha', lower=1e-5, upper=1e-1, log=True, default_value=1e-4) batch_size = ConfigSpace.UniformIntegerHyperparameter( name='mlpclassifier__batch_size', lower=32, upper=4096, default_value=200) learning_rate = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__learning_rate', choices=['constant', 'invscaling', 'adaptive'], default_value='constant') learning_rate_init = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__learning_rate_init', lower=1e-5, upper=1e-1, log=True, default_value=1e-04) # TODO: Sensible range?? power_t = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__power_t', lower=1e-5, upper=1, log=True, default_value=0.5) max_iter = ConfigSpace.UniformIntegerHyperparameter( name='mlpclassifier__max_iter', lower=64, upper=1024, default_value=200) shuffle = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__shuffle', choices=[True, False], default_value=True) tol = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__tol', lower=1e-5, upper=1e-1, default_value=1e-4, log=True) # TODO: log-scale? momentum = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__momentum', lower=0, upper=1, default_value=0.9) nesterovs_momentum = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__nesterovs_momentum', choices=[True, False], default_value=True) early_stopping = ConfigSpace.CategoricalHyperparameter( name='mlpclassifier__early_stopping', choices=[True, False], default_value=True) validation_fraction = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__validation_fraction', lower=0, upper=1, default_value=0.1) beta_1 = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__beta_1', lower=0, upper=1, default_value=0.9) beta_2 = ConfigSpace.UniformFloatHyperparameter( name='mlpclassifier__beta_2', lower=0, upper=1, default_value=0.999) n_iter_no_change = ConfigSpace.UniformIntegerHyperparameter( name='mlpclassifier__n_iter_no_change', lower=1, upper=1024, default_value=200) cs.add_hyperparameters([ strategy, hidden_layer_sizes, activation, solver, alpha, batch_size, learning_rate, learning_rate_init, power_t, max_iter, shuffle, tol, momentum, nesterovs_momentum, early_stopping, validation_fraction, beta_1, beta_2, n_iter_no_change, ]) batch_size_condition = ConfigSpace.InCondition(batch_size, solver, ['sgd', 'adam']) learning_rate_init_condition = ConfigSpace.InCondition(learning_rate_init, solver, ['sgd', 'adam']) power_t_condition = ConfigSpace.EqualsCondition(power_t, solver, 'sgd') shuffle_confition = ConfigSpace.InCondition(shuffle, solver, ['sgd', 'adam']) tol_condition = ConfigSpace.InCondition(tol, learning_rate, ['constant', 'invscaling']) momentum_confition = ConfigSpace.EqualsCondition(momentum, solver, 'sgd') nesterovs_momentum_confition_solver = ConfigSpace.EqualsCondition(nesterovs_momentum, solver, 'sgd') nesterovs_momentum_confition_momentum = ConfigSpace.GreaterThanCondition(nesterovs_momentum, momentum, 0) nesterovs_momentum_conjunstion = ConfigSpace.AndConjunction(nesterovs_momentum_confition_solver, nesterovs_momentum_confition_momentum) early_stopping_condition = ConfigSpace.InCondition(early_stopping, solver, ['sgd', 'adam']) validation_fraction_condition = ConfigSpace.EqualsCondition(validation_fraction, early_stopping, True) beta_1_condition = ConfigSpace.EqualsCondition(beta_1, solver, 'adam') beta_2_condition = ConfigSpace.EqualsCondition(beta_2, solver, 'adam') n_iter_no_change_condition_solver = ConfigSpace.InCondition(n_iter_no_change, solver, ['sgd', 'adam']) cs.add_condition(batch_size_condition) cs.add_condition(learning_rate_init_condition) cs.add_condition(power_t_condition) cs.add_condition(shuffle_confition) cs.add_condition(tol_condition) cs.add_condition(momentum_confition) cs.add_condition(nesterovs_momentum_conjunstion) cs.add_condition(early_stopping_condition) cs.add_condition(validation_fraction_condition) cs.add_condition(beta_1_condition) cs.add_condition(beta_2_condition) cs.add_condition(n_iter_no_change_condition_solver) return cs
# 3) LessThanCondition: # 'd' is only active if 'b' is less than 5 # We do not add this condition here directly, because we will use it later in the 'and-conjunction'. less_cond = CS.LessThanCondition(d, b, 5) # 4) GreaterThanCondition: # 'd' is only active if 'b' is greater than 2 greater_cond = CS.GreaterThanCondition(d, b, 2) # 5) InCondition: # 'e' is only active if 'c' is in the set [25, 26, 27] in_cond = CS.InCondition(e, c, [25, 26, 27]) # 6) AndConjunction: # The 'and-conjunction' combines the conditions less_cond and greater_cond cs.add_condition(CS.AndConjunction(less_cond, greater_cond)) # 7) OrConjunction: # The 'or-conjunction' works similar to the 'and-conjunction' equals_cond = CS.EqualsCondition(e, a, 2) cs.add_condition(CS.OrConjunction(in_cond, equals_cond)) # 8) ForbiddenEqualsClause: # This clause forbids the value 2 for the hyperparameter f forbidden_clause_f = CS.ForbiddenEqualsClause(f, 2) # 9) ForbiddenInClause # This clause forbids the value of the hyperparameter g to be in the set [2] forbidden_clause_g = CS.ForbiddenInClause(g, [2]) # 10) ForbiddenAndConjunction
def configuration_space_from_raw(hpRaw, hpRawConditions, resolve_multiple='AND'): cs = CS.ConfigurationSpace() # # add hyperparameters # for hp in hpRaw: if hp[4] == "float": cs.add_hyperparameter( CS.UniformFloatHyperparameter(hp[0], lower=hp[1][0], upper=hp[1][1], default_value=hp[2], log=hp[3])) elif hp[4] == "int": cs.add_hyperparameter( CS.UniformIntegerHyperparameter(hp[0], lower=hp[1][0], upper=hp[1][1], default_value=hp[2], log=hp[3])) elif (hp[4] == "cat"): cs.add_hyperparameter(CS.CategoricalHyperparameter(hp[0], hp[1])) else: raise Exception("unknown hp type in hpRawList") # # add conditions # covered_conditions = dict() for cond in hpRawConditions: # check if conditions for that hyperparameter were already processed if cond[0] in covered_conditions: continue covered_conditions[cond[0]] = True # get all conditions for that hyperparameter all_conds_for_hyperparameter = [] for other_cond in hpRawConditions: if other_cond[0] == cond[0]: all_conds_for_hyperparameter.append(other_cond) # create the condition objects condition_objects = [] for cond in all_conds_for_hyperparameter: if cond[1] == "eq": condition_objects.append( CS.EqualsCondition(cs.get_hyperparameter(cond[0]), cs.get_hyperparameter(cond[2]), cond[3])) elif cond[1] == "gtr": condition_objects.append( CS.GreaterThanCondition(cs.get_hyperparameter(cond[0]), cs.get_hyperparameter(cond[2]), cond[3])) else: raise Exception("unknown condition type in hpRawConditions") # add the conditons to the configuration space if len(condition_objects) == 1: # simply add the condition cs.add_condition(condition_objects[0]) else: # resolve multiple conditions if resolve_multiple == 'AND': cs.add_condition(CS.AndConjunction(*condition_objects)) elif resolve_multiple == 'OR': cs.add_condition(CS.OrConjunction(*condition_objects)) else: raise Exception("resolve_multiple=", resolve_multiple, ". should be 'AND' or 'OR'") return cs
def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, num_groups: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_groups", value_range=(1, 15), default_value=5, ), activation: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="activation", value_range=tuple(_activations.keys()), default_value=list(_activations.keys())[0], ), use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="use_dropout", value_range=(True, False), default_value=False, ), num_units: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_units", value_range=(10, 1024), default_value=200, ), dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="dropout", value_range=(0, 0.8), default_value=0.5, ), ) -> ConfigurationSpace: cs = ConfigurationSpace() # The number of hidden layers the network will have. # Layer blocks are meant to have the same architecture, differing only # by the number of units min_mlp_layers, max_mlp_layers = num_groups.value_range num_groups = get_hyperparameter(num_groups, UniformIntegerHyperparameter) add_hyperparameter(cs, activation, CategoricalHyperparameter) # We can have dropout in the network for # better generalization use_dropout = get_hyperparameter(use_dropout, CategoricalHyperparameter) cs.add_hyperparameters([num_groups, use_dropout]) for i in range(1, int(max_mlp_layers) + 1): n_units_search_space = HyperparameterSearchSpace( hyperparameter='num_units_%d' % i, value_range=num_units.value_range, default_value=num_units.default_value, log=num_units.log) n_units_hp = get_hyperparameter(n_units_search_space, UniformIntegerHyperparameter) cs.add_hyperparameter(n_units_hp) if i > int(min_mlp_layers): # The units of layer i should only exist # if there are at least i layers cs.add_condition( CS.GreaterThanCondition(n_units_hp, num_groups, i - 1)) dropout_search_space = HyperparameterSearchSpace( hyperparameter='dropout_%d' % i, value_range=dropout.value_range, default_value=dropout.default_value, log=dropout.log) dropout_hp = get_hyperparameter(dropout_search_space, UniformFloatHyperparameter) cs.add_hyperparameter(dropout_hp) dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout, True) if i > int(min_mlp_layers): dropout_condition_2 = CS.GreaterThanCondition( dropout_hp, num_groups, i - 1) cs.add_condition( CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) return cs
def fit_search_space(self, df): """Apply prior-guided transfer learning based on a DataFrame of results. :meta private: Args: df (str|DataFrame): a checkpoint from a previous search. """ if type(df) is str and df[-4:] == ".csv": df = pd.read_csv(df) assert isinstance(df, pd.DataFrame) cst = self._problem.space if type(cst) != CS.ConfigurationSpace: logging.error(f"{type(cst)}: not supported for trainsfer learning") res_df = df res_df_names = res_df.columns.values best_index = np.argmax(res_df["objective"].values) best_param = res_df.iloc[best_index] fac_numeric = 8.0 fac_categorical = 10.0 cst_new = CS.ConfigurationSpace(seed=1234) hp_names = cst.get_hyperparameter_names() for hp_name in hp_names: hp = cst.get_hyperparameter(hp_name) if hp_name in res_df_names: if (type(hp) is csh.UniformIntegerHyperparameter or type(hp) is csh.UniformFloatHyperparameter): mu = best_param[hp.name] lower = hp.lower upper = hp.upper sigma = max(1.0, (upper - lower) / fac_numeric) if type(hp) is csh.UniformIntegerHyperparameter: param_new = csh.NormalIntegerHyperparameter( name=hp.name, default_value=mu, mu=mu, sigma=sigma, lower=lower, upper=upper, ) else: # type is csh.UniformFloatHyperparameter: param_new = csh.NormalFloatHyperparameter( name=hp.name, default_value=mu, mu=mu, sigma=sigma, lower=lower, upper=upper, ) cst_new.add_hyperparameter(param_new) elif type(hp) is csh.CategoricalHyperparameter: choices = hp.choices weights = len(hp.choices) * [1.0] index = choices.index(best_param[hp.name]) weights[index] = fac_categorical norm_weights = [float(i) / sum(weights) for i in weights] param_new = csh.CategoricalHyperparameter( name=hp.name, choices=choices, weights=norm_weights) cst_new.add_hyperparameter(param_new) else: logging.warning( "Not fitting {hp} because it is not supported!") cst_new.add_hyperparameter(hp) else: logging.warning( "Not fitting {hp} because it was not found in the dataframe!" ) cst_new.add_hyperparameter(hp) # For conditions for cond in cst.get_conditions(): if type(cond) == CS.AndConjunction or type( cond) == CS.OrConjunction: cond_list = [] for comp in cond.components: cond_list.append(self.return_cond(comp, cst_new)) if type(cond) is CS.AndConjunction: cond_new = CS.AndConjunction(*cond_list) elif type(cond) is CS.OrConjunction: cond_new = CS.OrConjunction(*cond_list) else: logging.warning( f"Condition {type(cond)} is not implemented!") else: cond_new = self.return_cond(cond, cst_new) cst_new.add_condition(cond_new) # For forbiddens for cond in cst.get_forbiddens(): if type(cond) is CS.ForbiddenAndConjunction: cond_list = [] for comp in cond.components: cond_list.append(self.return_forbid(comp, cst_new)) cond_new = CS.ForbiddenAndConjunction(*cond_list) elif (type(cond) is CS.ForbiddenEqualsClause or type(cond) is CS.ForbiddenInClause): cond_new = self.return_forbid(cond, cst_new) else: logging.warning(f"Forbidden {type(cond)} is not implemented!") cst_new.add_forbidden_clause(cond_new) self._opt_kwargs["dimensions"] = cst_new
def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, num_groups: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_groups", value_range=(1, 15), default_value=5, ), use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="use_dropout", value_range=(True, False), default_value=False, ), num_units: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="num_units", value_range=(10, 1024), default_value=200, ), activation: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="activation", value_range=tuple(_activations.keys()), default_value=list(_activations.keys())[0], ), blocks_per_group: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="blocks_per_group", value_range=(1, 4), default_value=2, ), dropout: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="dropout", value_range=(0, 0.8), default_value=0.5, ), use_shake_shake: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="use_shake_shake", value_range=(True, False), default_value=True, ), use_shake_drop: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="use_shake_drop", value_range=(True, False), default_value=True, ), max_shake_drop_probability: HyperparameterSearchSpace = HyperparameterSearchSpace( hyperparameter="max_shake_drop_probability", value_range=(0, 1), default_value=0.5), ) -> ConfigurationSpace: cs = ConfigurationSpace() # The number of groups that will compose the resnet. That is, # a group can have N Resblock. The M number of this N resblock # repetitions is num_groups min_num_gropus, max_num_groups = num_groups.value_range num_groups = get_hyperparameter(num_groups, UniformIntegerHyperparameter) add_hyperparameter(cs, activation, CategoricalHyperparameter) cs.add_hyperparameters([num_groups]) # We can have dropout in the network for # better generalization use_dropout = get_hyperparameter(use_dropout, CategoricalHyperparameter) cs.add_hyperparameters([use_dropout]) use_shake_shake = get_hyperparameter(use_shake_shake, CategoricalHyperparameter) use_shake_drop = get_hyperparameter(use_shake_drop, CategoricalHyperparameter) shake_drop_prob = get_hyperparameter(max_shake_drop_probability, UniformFloatHyperparameter) cs.add_hyperparameters( [use_shake_shake, use_shake_drop, shake_drop_prob]) cs.add_condition( CS.EqualsCondition(shake_drop_prob, use_shake_drop, True)) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. for i in range(0, int(max_num_groups) + 1): n_units_search_space = HyperparameterSearchSpace( hyperparameter='num_units_%d' % i, value_range=num_units.value_range, default_value=num_units.default_value, log=num_units.log) n_units_hp = get_hyperparameter(n_units_search_space, UniformIntegerHyperparameter) blocks_per_group_search_space = HyperparameterSearchSpace( hyperparameter='blocks_per_group_%d' % i, value_range=blocks_per_group.value_range, default_value=blocks_per_group.default_value, log=blocks_per_group.log) blocks_per_group_hp = get_hyperparameter( blocks_per_group_search_space, UniformIntegerHyperparameter) cs.add_hyperparameters([n_units_hp, blocks_per_group_hp]) if i > 1: cs.add_condition( CS.GreaterThanCondition(n_units_hp, num_groups, i - 1)) cs.add_condition( CS.GreaterThanCondition(blocks_per_group_hp, num_groups, i - 1)) dropout_search_space = HyperparameterSearchSpace( hyperparameter='dropout_%d' % i, value_range=dropout.value_range, default_value=dropout.default_value, log=dropout.log) dropout_hp = get_hyperparameter(dropout_search_space, UniformFloatHyperparameter) cs.add_hyperparameter(dropout_hp) dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout, True) if i > 1: dropout_condition_2 = CS.GreaterThanCondition( dropout_hp, num_groups, i - 1) cs.add_condition( CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) return cs
def get_config_space(user_updates=None): cs = ConfigSpace.ConfigurationSpace() range_num_groups = (1, 9) range_blocks_per_group = (1, 4) range_num_units = (10, 1024) possible_activations = ('sigmoid', 'tanh', 'relu') range_max_shake_drop_probability = (0, 1) range_dropout = (0, 0.8) if user_updates is not None and 'num_groups' in user_updates: range_num_groups = user_updates['num_groups'] if user_updates is not None and 'blocks_per_group' in user_updates: range_blocks_per_group = user_updates['blocks_per_group'] num_groups = ConfigSpace.UniformIntegerHyperparameter( "num_groups", lower=range_num_groups[0], upper=range_num_groups[1]) cs.add_hyperparameter(num_groups) num_res_blocks = ConfigSpace.UniformIntegerHyperparameter( "blocks_per_group", lower=range_blocks_per_group[0], upper=range_blocks_per_group[1]) cs.add_hyperparameter(num_res_blocks) cs.add_hyperparameter( ConfigSpace.CategoricalHyperparameter("activation", possible_activations)) use_dropout = ConfigSpace.CategoricalHyperparameter("use_dropout", [True, False], default_value=True) cs.add_hyperparameter(use_dropout) cs.add_hyperparameter( ConfigSpace.CategoricalHyperparameter("use_shake_shake", [True, False], default_value=True)) shake_drop = cs.add_hyperparameter( ConfigSpace.CategoricalHyperparameter("use_shake_drop", [True, False], default_value=True)) shake_drop_prob = cs.add_hyperparameter( ConfigSpace.UniformFloatHyperparameter( "max_shake_drop_probability", lower=range_max_shake_drop_probability[0], upper=range_max_shake_drop_probability[1])) cs.add_condition( ConfigSpace.EqualsCondition(shake_drop_prob, shake_drop, True)) # it is the upper bound of the nr of groups, since the configuration will actually be sampled. for i in range(0, range_num_groups[1] + 1): n_units = ConfigSpace.UniformIntegerHyperparameter( "num_units_%d" % i, lower=range_num_units[0], upper=range_num_units[1], log=True) cs.add_hyperparameter(n_units) dropout = ConfigSpace.UniformFloatHyperparameter( "dropout_%d" % i, lower=range_dropout[0], upper=range_dropout[1]) cs.add_hyperparameter(dropout) dropout_condition_1 = ConfigSpace.EqualsCondition( dropout, use_dropout, True) if i > 1: cs.add_condition( ConfigSpace.GreaterThanCondition(n_units, num_groups, i - 1)) dropout_condition_2 = ConfigSpace.GreaterThanCondition( dropout, num_groups, i - 1) cs.add_condition( ConfigSpace.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) return cs
def get_configspace(): ''' Defines the configuration space for the Target Algorithm - the CNN module in this case :return: a ConfigSpace object containing the hyperparameters, conditionals and forbidden clauses on them ''' config_space = CS.ConfigurationSpace() ######################### # OPTIMIZER HYPERPARAMS # ######################### alpha = CSH.UniformFloatHyperparameter('learning_rate', lower=0.00001, upper=0.1, default_value=0.001, log=True) opti = CSH.CategoricalHyperparameter('model_optimizer', choices=['adam', 'adad', 'sgd'], default_value='sgd') amsgrad = CSH.CategoricalHyperparameter('amsgrad', choices=['True', 'False'], default_value='False') # ^ https://openreview.net/forum?id=ryQu7f-RZ sgdmom = CSH.UniformFloatHyperparameter('momentum', lower=0, upper=0.99, default_value=0.90) # ^ https://distill.pub/2017/momentum/ config_space.add_hyperparameters([alpha, opti, amsgrad, sgdmom]) ########################### # OPTIMIZER CONDITIONALS # ########################### amsgrad_cond = CS.EqualsCondition(amsgrad, opti, 'adam') sgdmom_cond = CS.EqualsCondition(sgdmom, opti, 'sgd') config_space.add_conditions([amsgrad_cond, sgdmom_cond]) ######################## # TRAINING HYPERPARAMS # ######################## # loss = CSH.CategoricalHyperparameter('training_criterion', choices=['cross_entropy'], # default_value='cross_entropy') # aug_prob = CSH.UniformFloatHyperparameter('aug_prob', lower=0, upper=0.5, default_value=0) batch = CSH.CategoricalHyperparameter( 'batch_size', choices=['50', '100', '200', '500', '1000'], default_value='100') # ^ https://stats.stackexchange.com/questions/164876/tradeoff-batch-size-vs-number-of-iterations-to-train-a-neural-network # ^ https://stats.stackexchange.com/questions/49528/batch-gradient-descent-versus-stochastic-gradient-descent config_space.add_hyperparameters([batch]) ############################ # ARCHITECTURE HYPERPARAMS # ############################ n_conv_layer = CSH.UniformIntegerHyperparameter('n_conv_layer', lower=1, upper=3, default_value=1, log=False) n_fc_layer = CSH.UniformIntegerHyperparameter('n_fc_layer', lower=1, upper=3, default_value=1, log=False) dropout = CSH.CategoricalHyperparameter('dropout', choices=['True', 'False'], default_value='False') activation = CSH.CategoricalHyperparameter( 'activation', choices=['relu', 'tanh', 'sigmoid'], default_value='tanh') batchnorm = CSH.CategoricalHyperparameter('batchnorm', choices=['True', 'False'], default_value='False') config_space.add_hyperparameters( [n_conv_layer, n_fc_layer, dropout, activation, batchnorm]) # # LAYER 1 PARAMS # kernel_1 = CSH.CategoricalHyperparameter('kernel_1', choices=['3', '5', '7'], default_value='5') channel_1 = CSH.UniformIntegerHyperparameter('channel_1', lower=3, upper=12, default_value=3) padding_1 = CSH.UniformIntegerHyperparameter('padding_1', lower=0, upper=3, default_value=2) stride_1 = CSH.UniformIntegerHyperparameter('stride_1', lower=1, upper=2, default_value=1) maxpool_1 = CSH.CategoricalHyperparameter('maxpool_1', choices=['True', 'False'], default_value='True') maxpool_kernel_1 = CSH.UniformIntegerHyperparameter('maxpool_kernel_1', lower=2, upper=6, default_value=6) config_space.add_hyperparameters([ kernel_1, padding_1, stride_1, maxpool_1, maxpool_kernel_1, channel_1 ]) # LAYER 1 CONDITIONALS maxpool_cond_1 = CS.NotEqualsCondition(maxpool_1, stride_1, 2) # ^ Convolution with stride 2 is equivalent to Maxpool - https://arxiv.org/abs/1412.6806 maxpool_kernel_cond_1 = CS.EqualsCondition(maxpool_kernel_1, maxpool_1, 'True') config_space.add_conditions([maxpool_cond_1, maxpool_kernel_cond_1]) # LAYER 1 - RESTRICTING PADDING RANGE # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n padding_1_cond_0 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_1, '3'), CS.ForbiddenInClause(padding_1, [2, 3])) padding_1_cond_1 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_1, '5'), CS.ForbiddenEqualsClause(padding_1, 3)) config_space.add_forbidden_clauses( [padding_1_cond_0, padding_1_cond_1]) # # LAYER 2 PARAMS # kernel_2 = CSH.CategoricalHyperparameter('kernel_2', choices=['3', '5', '7'], default_value='5') # Channels for Layer 2 onwards is a multiplicative factor of previous layer's channel size channel_2 = CSH.CategoricalHyperparameter('channel_2', choices=['1', '2', '3', '4'], default_value='2') # ^ Categorical instead of Integer owing to the design choice of channel_3 - for parity's sake I suppose padding_2 = CSH.UniformIntegerHyperparameter('padding_2', lower=0, upper=3, default_value=2) stride_2 = CSH.UniformIntegerHyperparameter('stride_2', lower=1, upper=2, default_value=1) maxpool_2 = CSH.CategoricalHyperparameter('maxpool_2', choices=['True', 'False'], default_value='True') maxpool_kernel_2 = CSH.UniformIntegerHyperparameter('maxpool_kernel_2', lower=2, upper=6, default_value=6) config_space.add_hyperparameters([ kernel_2, padding_2, stride_2, maxpool_2, maxpool_kernel_2, channel_2 ]) # LAYER 2 CONDITIONALS maxpool_cond_2 = CS.NotEqualsCondition(maxpool_2, stride_2, 2) # ^ Convolution with stride 2 is equivalent to Maxpool - https://arxiv.org/abs/1412.6806 maxpool_kernel_cond_2 = CS.EqualsCondition(maxpool_kernel_2, maxpool_2, 'True') # LAYER 2 - RESTRICTING PADDING RANGE # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n padding_2_cond_0 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_2, '3'), CS.ForbiddenInClause(padding_2, [2, 3])) padding_2_cond_1 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_2, '5'), CS.ForbiddenEqualsClause(padding_2, 3)) config_space.add_forbidden_clauses( [padding_2_cond_0, padding_2_cond_1]) # LAYER 2 ACTIVATE CONDITION # Layer 2 params will activate optionally only if n_conv_layer >= 2 kernel_2_cond = CS.InCondition(kernel_2, n_conv_layer, [2, 3]) channel_2_cond = CS.InCondition(channel_2, n_conv_layer, [2, 3]) padding_2_cond = CS.InCondition(padding_2, n_conv_layer, [2, 3]) stride_2_cond = CS.InCondition(stride_2, n_conv_layer, [2, 3]) maxpool_2_cond = CS.AndConjunction( CS.InCondition(maxpool_2, n_conv_layer, [2, 3]), maxpool_cond_2) maxpool_kernel_2_cond = CS.AndConjunction( CS.InCondition(maxpool_kernel_2, n_conv_layer, [2, 3]), maxpool_kernel_cond_2) config_space.add_conditions([ kernel_2_cond, channel_2_cond, padding_2_cond, stride_2_cond, maxpool_2_cond, maxpool_kernel_2_cond ]) # # LAYER 3 PARAMS # kernel_3 = CSH.CategoricalHyperparameter('kernel_3', choices=['1', '3', '5', '7'], default_value='5') # Channels for Layer 2 onwards is a multiplicative factor of previous layer's channel size # Also being the max convolution layer allowed, this allows for 1x1 convolution # Therefore, a downsampling of channel depth (factor of 0.5) - reduce dimensions along depth channel_3 = CSH.CategoricalHyperparameter( 'channel_3', choices=['0.5', '1', '2', '3'], default_value='2') padding_3 = CSH.UniformIntegerHyperparameter('padding_3', lower=0, upper=3, default_value=2) stride_3 = CSH.UniformIntegerHyperparameter('stride_3', lower=1, upper=2, default_value=1) maxpool_3 = CSH.CategoricalHyperparameter('maxpool_3', choices=['True', 'False'], default_value='True') maxpool_kernel_3 = CSH.UniformIntegerHyperparameter('maxpool_kernel_3', lower=2, upper=6, default_value=6) config_space.add_hyperparameters([ kernel_3, padding_3, stride_3, maxpool_3, maxpool_kernel_3, channel_3 ]) # LAYER 3 CONDITIONALS maxpool_cond_3 = CS.NotEqualsCondition(maxpool_3, stride_3, 2) maxpool_kernel_cond_3 = CS.EqualsCondition(maxpool_kernel_3, maxpool_3, 'True') # LAYER 3 - RESTRICTING PADDING RANGE # Ensuring a padding domain of {0, 1, ..., floor(n/2)} for kernel_size n padding_3_cond_0 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_3, '3'), CS.ForbiddenInClause(padding_3, [2, 3])) padding_3_cond_1 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(kernel_3, '5'), CS.ForbiddenEqualsClause(padding_3, 3)) config_space.add_forbidden_clauses( [padding_3_cond_0, padding_3_cond_1]) # LAYER 3 ACTIVATE CONDITION # Layer 2 params will activate optionally only if n_conv_layer >= 3 (max 3 conv layers allowed currently) kernel_3_cond = CS.EqualsCondition(kernel_3, n_conv_layer, 3) channel_3_cond = CS.EqualsCondition(channel_3, n_conv_layer, 3) padding_3_cond = CS.EqualsCondition(padding_3, n_conv_layer, 3) stride_3_cond = CS.EqualsCondition(stride_3, n_conv_layer, 3) maxpool_3_cond = CS.AndConjunction( CS.InCondition(maxpool_3, n_conv_layer, [2, 3]), maxpool_cond_3) maxpool_kernel_3_cond = CS.AndConjunction( CS.InCondition(maxpool_kernel_3, n_conv_layer, [2, 3]), maxpool_kernel_cond_3) config_space.add_conditions([ kernel_3_cond, channel_3_cond, padding_3_cond, stride_3_cond, maxpool_3_cond, maxpool_kernel_3_cond ]) # COMPLICATED ASSUMPTIONS MADE EMPIRICALLY TO IMPOSE CONSTRAINTS ON VARIOUS PARAMETERS SUCH THAT THE # CONFIGURATIONS SAMPLED BY THE CONFIGURATOR DOESN'T YIELD AN ARCHITECTURE WITH SHAPE/DIMENSION MISMATCH # FOLLOWING BASIC ASSUMPTIONS WERE MADE: # 1) AT MAX 3 CONVOLUTION LAYERS CAN BE FORMED # 2) CONVOLUTION KERNEL SIZE DOMAIN : {3, 5, 7} # 3) EACH CONVOLUTION LAYER MAY OR MAY NOT HAVE A MAXPOOL LAYER # 4) MAXPOOL KERNEL SIZE DOMAIN : {2, 3, 4, 5, 6} # 5) A CONVOLUTION WITH STRIDE 2 IS EQUIVALENT TO MAXPOOL - cannot occur together in same layer # MANY OTHER CONDITIONS WERE ADDED BASED ON OBSERVATION (a couple of them mentioned below): # 1) If n_conv_layer=3 then cannot have maxpool on all 3 layers # 2) Cannot use a convolution kernel of size 5 or 7 in the third layer # ... for_two_layers_1 = CS.ForbiddenAndConjunction( # Disallowing large maxpool kernel in first layer for a 2-layer convoluiton CS.ForbiddenEqualsClause(n_conv_layer, 2), CS.ForbiddenInClause(maxpool_kernel_1, [3, 4, 5, 6]), CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6])) for_two_layers_2 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenInClause(maxpool_kernel_1, [5, 6]), CS.ForbiddenInClause(kernel_2, ['5', '7'])) for_two_layers_3 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenInClause(kernel_1, ['5', '7']), CS.ForbiddenEqualsClause(maxpool_1, 'True'), CS.ForbiddenInClause(kernel_2, ['5', '7'])) for_three_layers_1_0 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution # Small maxpool kernel if subsequent layer contains another maxpool CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenInClause(maxpool_kernel_1, [5, 6])) for_three_layers_1_1 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution # Small maxpool kernel if subsequent layer contains another maxpool CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6])) for_three_layers_1_2 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution # Small maxpool kernel if subsequent layer contains another maxpool CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenInClause(maxpool_kernel_3, [3, 4, 5, 6])) for_three_layers_2 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution # Small maxpool kernel if subsequent layer contains another maxpoo)l CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenInClause(maxpool_kernel_1, [3, 4, 5, 6]), CS.ForbiddenInClause(maxpool_kernel_3, [5, 6])) for_three_layers_3 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenInClause(maxpool_kernel_2, [3, 4, 5, 6]), CS.ForbiddenInClause(maxpool_kernel_3, [5, 6])) for_three_layers_4 = CS.ForbiddenAndConjunction( # Constraining maxpool kernel sizes for a 3 layer convolution CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenEqualsClause(stride_2, 2), CS.ForbiddenInClause(maxpool_kernel_3, [5, 6])) for_three_layers_5 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenEqualsClause(stride_1, 2), CS.ForbiddenEqualsClause(stride_2, 2), CS.ForbiddenInClause(maxpool_kernel_3, [3, 4, 5, 6])) for_three_layers_6 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenInClause(maxpool_kernel_2, [4, 5, 6]), CS.ForbiddenInClause(kernel_3, ['5', '7'])) for_three_layers_7 = CS.ForbiddenAndConjunction( # Doesn't allow 3 consecutive maxpools with a large convolution mask in 3rd layer CS.ForbiddenEqualsClause(maxpool_1, 'True'), CS.ForbiddenEqualsClause(maxpool_2, 'True'), CS.ForbiddenInClause(kernel_3, ['3', '5', '7']), CS.ForbiddenEqualsClause(maxpool_3, 'True')) for_three_layers_8 = CS.ForbiddenAndConjunction( # Same as above, but stride=2 in place of maxpooling CS.ForbiddenEqualsClause(stride_1, 2), CS.ForbiddenEqualsClause(stride_2, 2), CS.ForbiddenInClause(kernel_3, ['3', '5', '7']), CS.ForbiddenEqualsClause(stride_3, 2)) for_three_layers_9 = CS.ForbiddenAndConjunction( # Allow a multiplication factor of only 0.5 for a 1x1 convolution in third layer # And no padding CS.ForbiddenInClause(kernel_3, ['3', '5', '7']), CS.ForbiddenInClause(channel_3, ['0.5']), CS.ForbiddenInClause(padding_3, [1, 2, 3])) for_three_layers_10 = CS.ForbiddenAndConjunction( # Allow a multiplication factor of only 0.5 for a 1x1 convolution in third layer # And no padding CS.ForbiddenEqualsClause(kernel_3, '1'), CS.ForbiddenInClause(channel_3, ['1', '2', '3']), CS.ForbiddenInClause(padding_3, [1, 2, 3])) for_three_layers_11 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenInClause(kernel_2, ['5', '7']), CS.ForbiddenEqualsClause(maxpool_2, 'True'), CS.ForbiddenInClause(kernel_3, ['5', '7'])) for_three_layers_12 = CS.ForbiddenAndConjunction( # Disallowing large convolution filter following a large max pool CS.ForbiddenInClause(kernel_2, ['5', '7']), CS.ForbiddenEqualsClause(maxpool_1, 'True'), CS.ForbiddenInClause(kernel_3, ['5', '7'])) config_space.add_forbidden_clauses([ for_two_layers_1, for_two_layers_2, for_two_layers_2, for_three_layers_1_0, for_three_layers_1_1, for_three_layers_1_2, for_three_layers_2, for_three_layers_3, for_three_layers_4, for_three_layers_5, for_three_layers_6, for_three_layers_7, for_three_layers_8, for_three_layers_9, for_three_layers_10, for_three_layers_11, for_three_layers_12 ]) # Forbidding a large convolution mask in the last layers last_layer_mask_1 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(n_conv_layer, 3), CS.ForbiddenEqualsClause(kernel_3, '7')) last_layer_mask_2 = CS.ForbiddenAndConjunction( CS.ForbiddenEqualsClause(n_conv_layer, 2), CS.ForbiddenEqualsClause(kernel_2, '7')) config_space.add_forbidden_clauses( [last_layer_mask_1, last_layer_mask_2]) # INTERMEDIATE FULLY CONNECTED LAYER PARAMS AND CONDITIONS (NOT OUTPUT LAYER) # Choosing min size as height/width of image, max as height x width of image # Max of 784 >>> output of the convolutions and pooling, hence adequately expressed fc1 = CSH.UniformIntegerHyperparameter('fc_1', lower=28, upper=784, default_value=500, log=True) fc2 = CSH.UniformIntegerHyperparameter('fc_2', lower=28, upper=784, default_value=500, log=True) config_space.add_hyperparameters([fc1, fc2]) # FC layers exists only if n_fc_layer > 1 fc1_cond = CS.InCondition(fc1, n_fc_layer, [2, 3]) fc2_cond = CS.EqualsCondition(fc2, n_fc_layer, 3) config_space.add_conditions([fc1_cond, fc2_cond]) return (config_space)
def fit_search_space(self, df, fac_numerical=0.125, fac_categorical=10): """Apply prior-guided transfer learning based on a DataFrame of results. Example Usage: >>> search = CBO(problem, evaluator) >>> search.fit_surrogate("results.csv") Args: df (str|DataFrame): a checkpoint from a previous search. fac_numerical (float): the factor used to compute the sigma of a truncated normal distribution based on ``sigma = max(1.0, (upper - lower) * fac_numerical)``. A small large factor increase exploration while a small factor increase exploitation around the best-configuration from the ``df`` parameter. fac_categorical (float): the weight given to a categorical feature part of the best configuration. A large weight ``> 1`` increase exploitation while a small factor close to ``1`` increase exploration. """ if type(df) is str and df[-4:] == ".csv": df = pd.read_csv(df) assert isinstance(df, pd.DataFrame) # check single or multiple objectives if "objective" in df.columns: # filter failures if pd.api.types.is_string_dtype(df.objective): df = df[~df.objective.str.startswith("F")] df.objective = df.objective.astype(float) else: # filter failures objcol = df.filter(regex=r"^objective_\d+$").columns for col in objcol: if pd.api.types.is_string_dtype(df[col]): df = df[~df[col].str.startswith("F")] df[col] = df[col].astype(float) cst = self._problem.space if type(cst) != CS.ConfigurationSpace: logging.error(f"{type(cst)}: not supported for trainsfer learning") res_df = df res_df_names = res_df.columns.values if "objective" in df.columns: best_index = np.argmax(res_df["objective"].values) best_param = res_df.iloc[best_index] else: best_index = non_dominated_set(-np.asarray(res_df[objcol]), return_mask=False)[0] best_param = res_df.iloc[best_index] cst_new = CS.ConfigurationSpace( seed=self._random_state.randint(0, 2**32)) hp_names = cst.get_hyperparameter_names() for hp_name in hp_names: hp = cst.get_hyperparameter(hp_name) if hp_name in res_df_names: if (type(hp) is csh.UniformIntegerHyperparameter or type(hp) is csh.UniformFloatHyperparameter): mu = best_param[hp.name] lower = hp.lower upper = hp.upper sigma = max(1.0, (upper - lower) * fac_numerical) if type(hp) is csh.UniformIntegerHyperparameter: param_new = csh.NormalIntegerHyperparameter( name=hp.name, default_value=mu, mu=mu, sigma=sigma, lower=lower, upper=upper, ) else: # type is csh.UniformFloatHyperparameter: param_new = csh.NormalFloatHyperparameter( name=hp.name, default_value=mu, mu=mu, sigma=sigma, lower=lower, upper=upper, ) cst_new.add_hyperparameter(param_new) elif (type(hp) is csh.CategoricalHyperparameter or type(hp) is csh.OrdinalHyperparameter): if type(hp) is csh.OrdinalHyperparameter: choices = hp.sequence else: choices = hp.choices weights = len(choices) * [1.0] index = choices.index(best_param[hp.name]) weights[index] = fac_categorical norm_weights = [float(i) / sum(weights) for i in weights] param_new = csh.CategoricalHyperparameter( name=hp.name, choices=choices, weights=norm_weights) cst_new.add_hyperparameter(param_new) else: logging.warning( f"Not fitting {hp} because it is not supported!") cst_new.add_hyperparameter(hp) else: logging.warning( f"Not fitting {hp} because it was not found in the dataframe!" ) cst_new.add_hyperparameter(hp) # For conditions for cond in cst.get_conditions(): if type(cond) == CS.AndConjunction or type( cond) == CS.OrConjunction: cond_list = [] for comp in cond.components: cond_list.append(self._return_cond(comp, cst_new)) if type(cond) is CS.AndConjunction: cond_new = CS.AndConjunction(*cond_list) elif type(cond) is CS.OrConjunction: cond_new = CS.OrConjunction(*cond_list) else: logging.warning( f"Condition {type(cond)} is not implemented!") else: cond_new = self._return_cond(cond, cst_new) cst_new.add_condition(cond_new) # For forbiddens for cond in cst.get_forbiddens(): if type(cond) is CS.ForbiddenAndConjunction: cond_list = [] for comp in cond.components: cond_list.append(self._return_forbid(comp, cst_new)) cond_new = CS.ForbiddenAndConjunction(*cond_list) elif (type(cond) is CS.ForbiddenEqualsClause or type(cond) is CS.ForbiddenInClause): cond_new = self._return_forbid(cond, cst_new) else: logging.warning(f"Forbidden {type(cond)} is not implemented!") cst_new.add_forbidden_clause(cond_new) self._opt_kwargs["dimensions"] = cst_new
def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None, min_mlp_layers: int = 1, max_mlp_layers: int = 15, dropout: bool = True, min_num_units: int = 10, max_num_units: int = 1024, ) -> ConfigurationSpace: cs = ConfigurationSpace() # The number of hidden layers the network will have. # Layer blocks are meant to have the same architecture, differing only # by the number of units num_groups = UniformIntegerHyperparameter( "num_groups", min_mlp_layers, max_mlp_layers, default_value=5) activation = CategoricalHyperparameter( "activation", choices=list(_activations.keys()) ) cs.add_hyperparameters([num_groups, activation]) # We can have dropout in the network for # better generalization if dropout: use_dropout = CategoricalHyperparameter( "use_dropout", choices=[True, False]) cs.add_hyperparameters([use_dropout]) for i in range(1, max_mlp_layers + 1): n_units_hp = UniformIntegerHyperparameter("num_units_%d" % i, lower=min_num_units, upper=max_num_units, default_value=20) cs.add_hyperparameter(n_units_hp) if i > min_mlp_layers: # The units of layer i should only exist # if there are at least i layers cs.add_condition( CS.GreaterThanCondition( n_units_hp, num_groups, i - 1 ) ) if dropout: dropout_hp = UniformFloatHyperparameter( "dropout_%d" % i, lower=0.0, upper=0.8, default_value=0.5 ) cs.add_hyperparameter(dropout_hp) dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout, True) if i > min_mlp_layers: dropout_condition_2 = CS.GreaterThanCondition(dropout_hp, num_groups, i - 1) cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) return cs
def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None, min_num_gropus: int = 1, max_num_groups: int = 9, min_blocks_per_groups: int = 1, max_blocks_per_groups: int = 4, min_num_units: int = 10, max_num_units: int = 1024, ) -> ConfigurationSpace: cs = ConfigurationSpace() # The number of groups that will compose the resnet. That is, # a group can have N Resblock. The M number of this N resblock # repetitions is num_groups num_groups = UniformIntegerHyperparameter( "num_groups", lower=min_num_gropus, upper=max_num_groups, default_value=5) activation = CategoricalHyperparameter( "activation", choices=list(_activations.keys()) ) cs.add_hyperparameters([num_groups, activation]) # We can have dropout in the network for # better generalization use_dropout = CategoricalHyperparameter( "use_dropout", choices=[True, False]) cs.add_hyperparameters([use_dropout]) use_shake_shake = CategoricalHyperparameter("use_shake_shake", choices=[True, False]) use_shake_drop = CategoricalHyperparameter("use_shake_drop", choices=[True, False]) shake_drop_prob = UniformFloatHyperparameter( "max_shake_drop_probability", lower=0.0, upper=1.0) cs.add_hyperparameters([use_shake_shake, use_shake_drop, shake_drop_prob]) cs.add_condition(CS.EqualsCondition(shake_drop_prob, use_shake_drop, True)) # It is the upper bound of the nr of groups, # since the configuration will actually be sampled. for i in range(0, max_num_groups + 1): n_units = UniformIntegerHyperparameter( "num_units_%d" % i, lower=min_num_units, upper=max_num_units, ) blocks_per_group = UniformIntegerHyperparameter( "blocks_per_group_%d" % i, lower=min_blocks_per_groups, upper=max_blocks_per_groups) cs.add_hyperparameters([n_units, blocks_per_group]) if i > 1: cs.add_condition(CS.GreaterThanCondition(n_units, num_groups, i - 1)) cs.add_condition(CS.GreaterThanCondition(blocks_per_group, num_groups, i - 1)) this_dropout = UniformFloatHyperparameter( "dropout_%d" % i, lower=0.0, upper=1.0 ) cs.add_hyperparameters([this_dropout]) dropout_condition_1 = CS.EqualsCondition(this_dropout, use_dropout, True) if i > 1: dropout_condition_2 = CS.GreaterThanCondition(this_dropout, num_groups, i - 1) cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2)) else: cs.add_condition(dropout_condition_1) return cs