def test_uniform(self): X = [ {'t1': 1.1, 't2': 0.01, 't3': 3.5, 't4': 'a'}, {'t1': 4, 't2': 0.001, 't3': 6.2, 't4': 'b'} ] y = [0.5, 0.6] c1 = HyperParameter(ParamTypes.INT, [1, 5]) c2 = HyperParameter(ParamTypes.FLOAT_EXP, [0.0001, 0.1]) c3 = HyperParameter(ParamTypes.FLOAT, [2, 8]) c4 = HyperParameter(ParamTypes.STRING, ['a', 'b', 'c']) tunables = [('t1', c1), ('t2', c2), ('t3', c3), ('t4', c4)] u = Uniform(tunables) u.add(X, y) u.add({'t1': 3.5, 't2': 0.1, 't3': 3.2, 't4': 'a'}, 0.8) for i in range(100): proposed = u.propose() self.assertTrue(proposed['t1'] >= 1 and proposed['t1'] <= 5) self.assertTrue(proposed['t2'] >= 0.0001 and proposed['t2'] <= 0.1) self.assertTrue(proposed['t3'] >= 2 and proposed['t3'] <= 8) self.assertTrue(proposed['t4'] in ['a', 'b', 'c']) multi_proposed = u.propose(10) for proposed in multi_proposed: self.assertTrue(proposed['t1'] >= 1 and proposed['t1'] <= 5) self.assertTrue(proposed['t2'] >= 0.0001 and proposed['t2'] <= 0.1) self.assertTrue(proposed['t3'] >= 2 and proposed['t3'] <= 8) self.assertTrue(proposed['t4'] in ['a', 'b', 'c'])
def test_gcpeivelocity(self): X = [{'a': 1.1, 'b': 0.01, 'c': 3.5}, {'a': 4, 'b': 0.001, 'c': 6.2}] y = [0.5, 0.6] c1 = HyperParameter(ParamTypes.INT, [1, 5]) c2 = HyperParameter(ParamTypes.FLOAT_EXP, [0.0001, 0.1]) c3 = HyperParameter(ParamTypes.FLOAT, [2, 8]) tunables = [('a', c1), ('b', c2), ('c', c3)] u = GCPEiVelocity(tunables) u.add(X, y) proposed = u.propose() self.assertTrue(proposed['a'] >= 1 and proposed['a'] <= 5) self.assertTrue(proposed['b'] >= 0.0001 and proposed['b'] <= 0.1) self.assertTrue(proposed['c'] >= 2 and proposed['c'] <= 8)
def extract_pipeline_tunables(pipeline): tunable_hyperparameters = defaultdict(dict) for step, step_hyperparams in enumerate(pipeline.get_free_hyperparams()): for name, hyperparam in step_hyperparams.items(): if TUNING_PARAMETER not in hyperparam.semantic_types: continue if isinstance(hyperparam, Union): hyperparam = hyperparam.default_hyperparameter try: param_type = hyperparam.structural_type.__name__ param_type = 'string' if param_type == 'str' else param_type if param_type == 'bool': param_range = [True, False] elif hasattr(hyperparam, 'values'): param_range = hyperparam.values else: lower = hyperparam.lower upper = hyperparam.upper if upper is None: upper = lower + 1000 elif upper > lower: if param_type == 'int': upper = upper - 1 elif param_type == 'float': upper = upper - 0.0001 param_range = [lower, upper] except AttributeError: LOGGER.warn('Warning! skipping: %s, %s, %s', step, name, hyperparam) continue try: # Health-Check: Some configurations make HyperParameter crash HyperParameter(param_type, param_range) # If the line above did not crash, we are safe tunable_hyperparameters[step][name] = { 'type': param_type, 'range': param_range, 'default': hyperparam.get_default() } except OverflowError: LOGGER.warn('Warning! Overflow: %s, %s, %s', step, name, hyperparam) continue return tunable_hyperparameters
def __init__(self, config): """ config: JSON dictionary containing all the information needed to specify this enumerator """ with open(join(CONFIG_PATH, config)) as f: config = json.load(f) self.name = config['name'] self.conditions = config['conditions'] self.root_params = config['root_parameters'] self.class_path = config['class'] # create hyperparameters from the parameter config self.parameters = {k: HyperParameter(typ=v['type'], rang=v['range']) for k, v in config['parameters'].items()}
def _create_tuner(self, pipeline): # Build an MLPipeline to get the tunables and the default params mlpipeline = MLPipeline.from_dict(self.template_dict) tunable_hyperparameters = mlpipeline.get_tunable_hyperparameters() tunables = [] tunable_keys = [] for block_name, params in tunable_hyperparameters.items(): for param_name, param_details in params.items(): key = (block_name, param_name) param_type = param_details['type'] param_type = PARAM_TYPES.get(param_type, param_type) if param_type == 'bool': param_range = [True, False] else: param_range = param_details.get( 'range') or param_details.get('values') value = HyperParameter(param_type, param_range) tunables.append((key, value)) tunable_keys.append(key) # Create the tuner LOGGER.info('Creating %s tuner', self._tuner_class.__name__) self.tuner = self._tuner_class(tunables) if pipeline: try: # Add the default params and the score obtained by them to the tuner. default_params = defaultdict(dict) for block_name, params in pipeline.pipeline.get_hyperparameters( ).items(): for param, value in params.items(): key = (block_name, param) if key in tunable_keys: if value is None: raise ValueError('None value is not supported') default_params[key] = value if pipeline.rank is not None: self.tuner.add(default_params, 1 - pipeline.rank) except ValueError: pass
def get_tunables(hyperparameters): tunables = list() for block_name, params in hyperparameters.items(): for param_name, param_details in params.items(): key = (block_name, param_name) param_type = param_details['type'] param_type = 'string' if param_type == 'str' else param_type if param_type == 'bool': param_range = [True, False] else: param_range = param_details.get('range') or param_details.get( 'values') value = HyperParameter(param_type, param_range) tunables.append((key, value)) return tunables
def _get_tunables(self): tunables = [] tunable_keys = [] for block_name, params in self._pipeline.get_tunable_hyperparameters().items(): for param_name, param_details in params.items(): key = (block_name, param_name) param_type = param_details['type'] param_type = 'string' if param_type == 'str' else param_type if param_type == 'bool': param_range = [True, False] else: param_range = param_details.get('range') or param_details.get('values') value = HyperParameter(param_type, param_range) tunables.append((key, value)) tunable_keys.append(key) return tunables, tunable_keys
def __init__(self, eval_train_path: str, eval_test_path: str): super(HyperparameterSearchGym, self).__init__() self.train_word_pairs, self.train_similarity = fasttexteval.load_eval_data( eval_train_path) self.test_word_pairs, self.test_similarity = fasttexteval.load_eval_data( eval_test_path) tunables = [ ('lr', HyperParameter(ParamTypes.FLOAT, [0.001, 0.1])), ('dim', HyperParameter(ParamTypes.INT, [50, 350])), ('ws', HyperParameter(ParamTypes.INT, [3, 11])), ('epoch', HyperParameter(ParamTypes.INT, [3, 11])), ('minn', HyperParameter(ParamTypes.INT, [2, 5])), ('maxn', HyperParameter(ParamTypes.INT, [5, 9])), ('loss', HyperParameter(ParamTypes.STRING, ['ns', 'hs'])), ] self.tuner = GP(tunables)
def _get_tuner(self, pipeline, template_dict): # Build an MLPipeline to get the tunables and the default params mlpipeline = MLPipeline.from_dict(template_dict) tunables = [] tunable_keys = [] for block_name, params in mlpipeline.get_tunable_hyperparameters( ).items(): for param_name, param_details in params.items(): key = (block_name, param_name) param_type = param_details['type'] param_type = PARAM_TYPES.get(param_type, param_type) if param_type == 'bool': param_range = [True, False] else: param_range = param_details.get( 'range') or param_details.get('values') value = HyperParameter(param_type, param_range) tunables.append((key, value)) tunable_keys.append(key) # Create the tuner LOGGER.info('Creating %s tuner', self._tuner_class.__name__) tuner = self._tuner_class(tunables) if pipeline: # Add the default params and the score obtained by the default pipeline to the tuner. default_params = defaultdict(dict) for block_name, params in pipeline.pipeline.get_hyperparameters( ).items(): for param, value in params.items(): key = (block_name, param) if key in tunable_keys: # default_params[key] = 'None' if value is None else value default_params[key] = value tuner.add(default_params, 1 - pipeline.rank) return tuner
def setUp(self): self.tunables = [ ('t1', HyperParameter(ParamTypes.INT, [1, 3])), ('t2', HyperParameter(ParamTypes.INT_EXP, [10, 10000])), ('t3', HyperParameter(ParamTypes.FLOAT, [1.5, 3.2])), ('t4', HyperParameter(ParamTypes.FLOAT_EXP, [0.001, 100])), ('t5', HyperParameter(ParamTypes.FLOAT_CAT, [0.1, 0.6, 0.5])), ('t6', HyperParameter(ParamTypes.BOOL, [True, False])), ('t7', HyperParameter(ParamTypes.STRING, ['a', 'b', 'c'])), ] self.X = [ {'t1': 2, 't2': 1000, 't3': 3.0, 't4': 0.1, 't5': 0.5, 't6': True, 't7': 'a'}, {'t1': 1, 't2': 100, 't3': 1.9, 't4': 0.1, 't5': 0.6, 't6': True, 't7': 'b'}, {'t1': 3, 't2': 10, 't3': 2.6, 't4': 0.01, 't5': 0.1, 't6': False, 't7': 'c'}, ] self.Y = [0.5, 0.6, 0.1]
def __init__(self, config): """ config: JSON dictionary containing all the information needed to specify this enumerator """ with open(join(CONFIG_PATH, config)) as f: config = json.load(f) self.name = config['name'] self.conditions = config['conditions'] self.root_params = config['root_parameters'] # import the method's python class path = config['class'].split('.') mod_str, cls_str = '.'.join(path[:-1]), path[-1] mod = import_module(mod_str) self.class_ = getattr(mod, cls_str) # create hyperparameters from the parameter config self.parameters = { k: HyperParameter(typ=v['type'], rang=v['range']) for k, v in config['parameters'].items() }
verbose=False, ) model.fit(X, y) predicted = model.predict(X_test) score = accuracy_score(predicted, y_test) # record hyper-param combination and score for tuning tuner.add(params, score) print("Final score:", tuner._best_score) print("Loading MNIST Data") mnist = fetch_mldata('MNIST original') X, X_test, y, y_test = train_test_split( mnist.data, mnist.target, train_size=1000, test_size=300, ) # parameters of RandomForestClassifier we wish to tune and their ranges tunables = [('n_estimators', HyperParameter(ParamTypes.INT, [10, 500])), ('max_depth', HyperParameter(ParamTypes.INT, [3, 20]))] print("-------Tuning with a Uniform Tuner-------") tuner = Uniform(tunables) tune_random_forest(tuner, X, y, X_test, y_test) print("-------Tuning with a GP Tuner-------") tuner = GP(tunables) tune_random_forest(tuner, X, y, X_test, y_test)
def __init__(self): super(HyperparameterSearchGym, self).__init__() generic_params = [ ('lr', HyperParameter(ParamTypes.FLOAT, [0.001, 0.01])), ('decay_rate', HyperParameter(ParamTypes.FLOAT, [0.01, 0.1])), ('embeddings_size', HyperParameter(ParamTypes.INT, [4, 24])), ('dense_output_units', HyperParameter(ParamTypes.INT, [16, 256])), ('batch_size', HyperParameter(ParamTypes.INT, [4, 128])), ('dropout', HyperParameter(ParamTypes.FLOAT, [0., 0.6])), ('use_crf', HyperParameter(ParamTypes.BOOL, [True, False])), ] ''' CNN Models ''' cnn3 = [ ('kernel_sizes-1', HyperParameter(ParamTypes.INT, [3, 7])), ('kernel_sizes-2', HyperParameter(ParamTypes.INT, [3, 7])), ('kernel_sizes-3', HyperParameter(ParamTypes.INT, [3, 7])), ('nb_filters-1', HyperParameter(ParamTypes.INT, [32, 384])), ('nb_filters-2', HyperParameter(ParamTypes.INT, [32, 384])), ('nb_filters-3', HyperParameter(ParamTypes.INT, [32, 384])), ('dilations-1', HyperParameter(ParamTypes.INT, [1, 5])), ('dilations-2', HyperParameter(ParamTypes.INT, [1, 5])), ('dilations-3', HyperParameter(ParamTypes.INT, [1, 5])), ] + deepcopy(generic_params) cnn4 = [ ('kernel_sizes-4', HyperParameter(ParamTypes.INT, [3, 7])), ('nb_filters-4', HyperParameter(ParamTypes.INT, [32, 384])), ('dilations-4', HyperParameter(ParamTypes.INT, [1, 5])), ] + deepcopy(cnn3) ''' RNN Models ''' rnn2 = [ ('recurrent_units-1', HyperParameter(ParamTypes.INT, [16, 512])), ('recurrent_units-2', HyperParameter(ParamTypes.INT, [16, 512])), ] + deepcopy(generic_params) rnn3 = [ ('recurrent_units-3', HyperParameter(ParamTypes.INT, [16, 512])), ] + deepcopy(rnn2) self.tuners = { 'CNN-3': GP(cnn3), 'CNN-4': GP(cnn4), 'RNN-2': GP(rnn2), 'RNN-3': GP(rnn3), } self.selector = UCB1(list(self.tuners.keys()))
def lgb_tune_btb(train_x, train_y, val_x, val_y, n_turn=30, verbose=True): from btb.tuning import GP from btb import HyperParameter, ParamTypes tunables = [ # ('n_estimators', HyperParameter(ParamTypes.INT, [10, 500])), ('num_leaves', HyperParameter(ParamTypes.INT, [28, 64])), ("learning_rate", HyperParameter(ParamTypes.FLOAT, [0.01, 0.05])), ("colsample_bytree", HyperParameter(ParamTypes.FLOAT, [0.6, 1.0])), ("subsample", HyperParameter(ParamTypes.FLOAT, [0.6, 1.0])), ("reg_alpha", HyperParameter(ParamTypes.INT, [0, 32])), ("reg_lambda", HyperParameter(ParamTypes.INT, [0, 64])), ("min_child_weight", HyperParameter(ParamTypes.INT, [1, 32])), # ("max_bin", HyperParameter(ParamTypes.INT, [256, 512])), ] tuner = GP(tunables) def tune_lgb(tuner, train_x, train_y, val_x, val_y, n_turn): param_ls = [] score_ls = [] for i in range(n_turn): print("the {}th round ".format(i)) params = tuner.propose() params.update({ "boosting_type": 'gbdt', "n_estimators": 4000, "n_jobs": -1, "objective": 'binary', "metric": "auc", "max_depth": -1 }) d_train = lgb.Dataset(train_x, label=train_y) d_test = lgb.Dataset(val_x, label=val_y) model = lgb.train(params, d_train, 3000, valid_sets=[d_train, d_test], early_stopping_rounds=100, verbose_eval=200) # model = lgb.LGBMClassifier( # boosting_type='gbdt', # n_estimators=4000, # n_jobs=-1, # objective='binary', # min_child_weight=params['min_child_weight'], # verbose=200, eval_metric='auc', # num_leaves=params['num_leaves'], # learning_rate=params["learning_rate"], # reg_alpha=params["reg_alpha"], # reg_lambda=params["reg_lambda"], # subsample=params["subsample"], # colsample_bytree=params["colsample_bytree"]) # model.fit(train_x, train_y, eval_set=[(train_x, train_y), (val_x, val_y)], eval_metric="auc", # early_stopping_rounds=100,verbose=200) auc = model.best_score["valid_1"]["auc"] best_n_estimator = model.best_iteration params.update({"n_estimators": best_n_estimator}) if verbose: print("params:", params) print("validation auc:", auc) param_ls.append(params) score_ls.append(auc) tuner.add(params, auc) del d_train, d_test, model import gc gc.collect best_params = param_ls[score_ls.index(max(score_ls))] if verbose: print("best params:", best_params) print("best score:", tuner._best_score) return best_params return tune_lgb(tuner, train_x, train_y, val_x, val_y, n_turn)
X, X_test, y, y_test = train_test_split( mnist.data, mnist.target, train_size=1000, test_size=300, ) # Establish global variables SELCTOR_NUM_ITER = 5 # we will use the selector 5 times TUNING_BUDGET_PER_ITER = 3 # we will tune for 3 iterations per round # of selection # initialize the tuners # parameters of RandomForestClassifier we wish to tune and their ranges tunables_rf = [ ('n_estimators', HyperParameter(ParamTypes.INT, [10, 500])), ('max_depth', HyperParameter(ParamTypes.INT, [3, 20])) ] # parameters of SVM we wish to tune and their ranges tunables_svm = [ ('c', HyperParameter(ParamTypes.FLOAT_EXP, [0.01, 10.0])), ('gamma', HyperParameter(ParamTypes.FLOAT, [0.000000001, 0.0000001])) ] # Create a GP-based tuner for these tunables rf_tuner = GP(tunables_rf) svm_tuner = GP(tunables_svm) # Function to generate proper model given hyperparameters def gen_rf(params): return RandomForestClassifier( n_estimators=params['n_estimators'],
def _knob_to_tunable(name, knob_config): tunable_type = _KNOB_TYPE_TO_TUNABLE_TYPE[knob_config['type']] tunable_range = _KNOB_CONFIG_TO_TUNABLE_RANGE[tunable_type](knob_config) return (name, HyperParameter(tunable_type, tunable_range))
# score the candidate point (x, y) -- always doing maximization! score = rosenbrock(**candidate) # report the results back to the tuner tuner.add(candidate, score) print('best score: ', tuner._best_score) print('best hyperparameters: ', tuner._best_hyperparams) # initialize the tunables, ie the function inputs x and y # we make a prior guess that the maximum function value will be found when # x and y are between -100 and 1000 tunables = ( ('x', HyperParameter('float', [-100, 100])), ('y', HyperParameter('float', [-100, 100])), ) print('Tuning with Uniform tuner') tuner = btb.tuning.Uniform(tunables) find_min_with_tuner(tuner) print() print('Tuning with GP tuner') tuner = btb.tuning.GP(tunables) find_min_with_tuner(tuner) print() actual = rosenbrock(1, 1) print('Actual optimum: ', actual)
mnist = fetch_mldata('MNIST original') X, X_test, y, y_test = train_test_split( mnist.data, mnist.target, train_size=1000, test_size=300, ) # Establish global variables SELCTOR_NUM_ITER = 5 # we will use the selector 5 times TUNING_BUDGET_PER_ITER = 3 # we will tune for 3 iterations per round # of selection # initialize the tuners # parameters of RandomForestClassifier we wish to tune and their ranges tunables_rf = [('n_estimators', HyperParameter(ParamTypes.INT, [10, 500])), ('max_depth', HyperParameter(ParamTypes.INT, [3, 20]))] # parameters of SVM we wish to tune and their ranges tunables_svm = [('c', HyperParameter(ParamTypes.FLOAT_EXP, [0.01, 10.0])), ('gamma', HyperParameter(ParamTypes.FLOAT, [0.000000001, 0.0000001]))] # Create a GP-based tuner for these tunables rf_tuner = GP(tunables_rf) svm_tuner = GP(tunables_svm) # Function to generate proper model given hyperparameters def gen_rf(params): return RandomForestClassifier( n_estimators=params['n_estimators'], max_depth=params['max_depth'],
def _knob_to_tunable(knob): if isinstance(knob, CategoricalKnob): if knob.value_type is int: return HyperParameter(ParamTypes.INT_CAT, knob.values) elif knob.value_type is float: return HyperParameter(ParamTypes.FLOAT_CAT, knob.values) elif knob.value_type is str: return HyperParameter(ParamTypes.STRING, knob.values) elif knob.value_type is bool: return HyperParameter(ParamTypes.BOOL, knob.values) elif isinstance(knob, FixedKnob): if knob.value_type is int: return HyperParameter(ParamTypes.INT_CAT, [knob.value]) elif knob.value_type is float: return HyperParameter(ParamTypes.FLOAT_CAT, [knob.value]) elif knob.value_type is str: return HyperParameter(ParamTypes.STRING, [knob.value]) elif knob.value_type is bool: return HyperParameter(ParamTypes.BOOL, [knob.value]) elif isinstance(knob, IntegerKnob): if knob.is_exp: return HyperParameter(ParamTypes.INT_EXP, [knob.value_min, knob.value_max]) else: return HyperParameter(ParamTypes.INT, [knob.value_min, knob.value_max]) elif isinstance(knob, FloatKnob): if knob.is_exp: return HyperParameter(ParamTypes.FLOAT_EXP, [knob.value_min, knob.value_max]) else: return HyperParameter(ParamTypes.FLOAT, [knob.value_min, knob.value_max])
for i in range(100): # use tuner to get next set of (x,y) to try xy_to_try = tuner.propose() score = rosenbrok(xy_to_try['x'], xy_to_try['y']) tuner.add(xy_to_try, -1 * score) print("minimum score:", tuner._best_score) print("minimum score:", tuner._best_hyperparams) print( "minium score x:", tuner._best_hyperparams['x'], "minimum score y:", tuner._best_hyperparams['y'], ) # initialize the tuneables, ie the function inputs x and y # we make a prior guess that the mimum function value will be found when # x and y are between -100 and 1000 x = HyperParameter(ParamTypes.INT, [-100, 1000]) y = HyperParameter(ParamTypes.INT, [-100, 1000]) print("------------Minimum found with uniform tuner--------------") tuner = Uniform([("x", x), ("y", y)]) find_min_with_tuner(tuner) print("------------Minimum found with GP tuner--------------") tuner = GP([("x", x), ("y", y)]) find_min_with_tuner(tuner)