def test_list_encoded_decoded(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) sample = h.sample() encoded = h.encode(sample) encoding = [0., 3., 9.1455681, 1.] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = h.decode(encoded) sample_ = sample[:3] decoded_ = decoded[:3] assert np.allclose(decoded_, sample_, rtol=1e-5) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) sample = h.sample() encoded = h.encode(sample) encoding = [ 1., 1., 2., 0., 3., 9.675319, 9.89098828, 8.15032456, 9.37517511, 8.58668476, 0., 1., 1., 1. ] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = h.decode(encoded) sample_ = sample[:10] decoded_ = decoded[:10] assert np.allclose(decoded_, sample_, rtol=1e-5)
def test_list_encoded_decoded(): params = get_hyperparameter_list() h = hp.HyperParameterList(params, seed=0) sample = h.sample() encoded = h.encode(sample) encoding = [0., 1., 8.30798471, 0.] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = h.decode(encoded) sample_ = sample[:3] decoded_ = decoded[:3] assert np.allclose(decoded_, sample_, rtol=1e-5) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params, seed=0) sample = h.sample() encoded = h.encode(sample) encoding = [ 0., 1., 1., 3., 0., 8.30798471, 7.0777787, 8.64898743, 8.30596718, 8.26110341, 0., 0., 1., 1. ] print(encoded) assert np.allclose(encoded, encoding, rtol=1e-5) decoded = h.decode(encoded) sample_ = sample[:10] decoded_ = decoded[:10] assert np.allclose(decoded_, sample_, rtol=1e-5)
def test_list(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) assert h.name == 'parameter_list' assert h.num_choices == 4 assert repr(h) list_names = h.get_parameter_names() for param in params: assert param.name in list_names assert h.param2id[param.name] is not None # Multi Parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) assert h.name == 'parameter_list' assert h.num_choices == 4 assert repr(h) list_names = h.get_parameter_names() for param in params: assert h.param2id[param.name] is not None for i in range(param.sample_count): param_name = (param.name + '_%d' % (i + 1)) assert param_name in list_names
def test_list_remove(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) # remove by parameter class h.remove_hyper_parameter(params[0]) assert h.num_choices == 3 assert params[0].name not in h.name_map.values() assert params[0].name not in h.param2id for param in params[1:]: assert param.name in h.name_map.values() assert h.param2id[param.name] is not None # remove by string name h.remove_hyper_parameter('h2') assert h.num_choices == 2 assert params[1].name not in h.name_map.values() assert params[1].name not in h.param2id assert params[2].name in h.name_map.values() assert h.param2id[params[2].name] is not None with pytest.raises(KeyError): h.remove_hyper_parameter('h5') with pytest.raises(ValueError): h.remove_hyper_parameter(None) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) # remove by parameter class h.remove_hyper_parameter(params[0]) assert h.num_choices == 3 assert params[0].name not in h.name_map.values() assert params[0].name not in h.param2id for param in params[1:]: assert param.name in h.name_map.values() assert h.param2id[param.name] is not None # remove by string name h.remove_hyper_parameter('h2') assert h.num_choices == 2 assert params[1].name not in h.name_map.values() assert params[1].name not in h.param2id assert params[2].name in h.name_map.values() assert h.param2id[params[2].name] is not None with pytest.raises(KeyError): h.remove_hyper_parameter('h5') with pytest.raises(ValueError): h.remove_hyper_parameter(None)
def test_list_sample(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) sample = h.sample() assert len(sample) == 4 # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) sample = h.sample() assert len(sample) == 14
def test_evaluate_train_evaluate(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) # test samples num_samples = 100 samples = [h.sample() for _ in range(num_samples)] ex2, _ = dataset.encode_dataset(samples, None) preds = xgb_utils.evaluate_models(ex2, clfs) count = np.sum(preds) print(count) assert preds.shape == (num_samples,) assert count > 0
def test_evaluate_single_sample(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) # single sample test sample = h.sample() ex2, _ = dataset.encode_dataset([sample]) assert ex2.shape == (1, 3) pred = xgb_utils.evaluate_models(ex2, clfs) assert pred.shape == (1,)
def test_csvwriter_fit(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # Create the callbacks callback = cb.CSVLogger('shac/logs.csv', append=True) # training shac.fit(evaluation_simple, callbacks=[callback]) assert os.path.exists('shac/logs.csv')
def run_shac_hartmann6(): total_budget = 200 num_batches = 20 objective = 'min' params = get_hartmann6_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=num_batches, objective=objective) # do parallel work for fast processing shac.num_parallel_generators = 8 shac.num_parallel_evaluators = 1 print() # training if os.path.exists('shac/'): shac.restore_data() shac.fit(evaluation_hartmann6, skip_cv_checks=True) print() print("Evaluating after training") predictions = shac.predict(num_batches=1, num_workers_per_batch=1) pred_evals = [evaluation_hartmann6(0, pred) for pred in predictions] pred_mean = np.mean(pred_evals) print() print("Predicted mean : ", pred_mean)
def test_shac_simple_early_stop(): total_budget = 100 batch_size = 20 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.KerasSHAC(h, total_budget=total_budget, max_gpu_evaluators=0, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 5 assert shac.num_workers == 5 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 1 shac.num_parallel_evaluators = 1 # training (with failure) shac.fit(evaluation_simple_keras_tf, early_stop=True, skip_cv_checks=True) assert len(shac.classifiers) == 0
def test_csvwriter_fit_dataset(): total_budget = 1000 batch_size = 5 objective = 'max' params = [hp.UniformHP('x', -1., 1.), hp.NormalHP('y', 0., 5.)] h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # create the mock dataset create_mock_dataset() print("Evaluating before training") np.random.seed(0) # Create the callbacks callback = cb.CSVLogger('shac/logs.csv') # training shac.fit_dataset('shac/mock.csv', callbacks=[callback]) assert os.path.exists('shac/logs.csv')
def test_dataset_multi_get_best_parameters(): params = get_multi_parameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) with pytest.raises(ValueError): dataset.get_best_parameters(None) # Test with empty dataset assert dataset.get_best_parameters() is None samples = [(h.sample(), np.random.uniform()) for _ in range(5)] for sample in samples: dataset.add_sample(*sample) objective_values = [v for h, v in samples] min_index = np.argmin(objective_values) max_index = np.argmax(objective_values) max_hp = data.flatten_parameters( dataset.get_best_parameters(objective='max')) min_hp = data.flatten_parameters( dataset.get_best_parameters(objective='min')) assert max_hp == samples[max_index][0] assert min_hp == samples[min_index][0]
def test_serialization_deserialization(): basepath = 'shac' params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) xgb_utils.save_classifiers(clfs, basepath) assert os.path.exists(os.path.join(basepath, 'classifiers', 'classifiers.pkl')) models = xgb_utils.restore_classifiers(basepath) assert len(models) == len(clfs) with pytest.raises(FileNotFoundError): models = xgb_utils.restore_classifiers('none')
def test_multi_set_dataset(): params = get_multi_parameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # numpy arrays samples = [(np.array(h.sample()), np.random.uniform()) for _ in range(5)] x, y = zip(*samples) x = np.array(x) y = np.array(y) dataset.set_dataset(x, y) assert len(dataset) == 5 dataset.clear() # python arrays samples = [(h.sample(), float(np.random.uniform())) for _ in range(5)] x, y = zip(*samples) dataset.set_dataset(x, y) assert len(dataset) == 5 # None data with pytest.raises(TypeError): dataset.set_dataset(None, int(6)) with pytest.raises(TypeError): dataset.set_dataset([1, 2, 3], None) with pytest.raises(TypeError): dataset.set_dataset(None, None)
def test_dataset_basedir_custom(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h, basedir='custom') assert os.path.exists(dataset.basedir) assert not os.path.exists('shac')
def test_history_fit(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 print("Evaluating before training") np.random.seed(0) # Create the callbacks history = cb.History() # training history = shac.fit(evaluation_simple, callbacks=[history]) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history assert 'generator_threads' in history.history assert 'device_ids' in history.history # Test passing in empty callback list # training shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) history = shac.fit(evaluation_simple) assert isinstance(history, cb.History) assert 'begin_run_index' in history.history assert 'model' in history.history assert 'parameters' in history.history assert 'evaluations' in history.history assert 'per_classifier_budget' in history.history assert 'generator_threads' in history.history assert 'device_ids' in history.history
def test_list_empty(): h = hp.HyperParameterList() assert h.name == 'parameter_list' assert h.num_choices == 0 assert len(h.id2param) == 0 assert len(h.param2id) == 0 assert len(h.name_map) == 0
def test_list_serialization_deserialization(): params = get_hyperparameter_list() h = hp.HyperParameterList(params, seed=0) config = h.get_config() assert len(config) == len(h.name_map) cnames_config = config.values() for cls_name, cls_cfg in cnames_config: cls = hp.get_parameter(cls_name) assert cls.load_from_config(cls_cfg) h = hp.HyperParameterList.load_from_config(config) assert len(config) == len(h.name_map) cnames_config = list(config.values()) for cname_cfg in cnames_config: cls_name, cls_cfg = cname_cfg cls = hp.get_parameter(cls_name) assert cls.load_from_config(cls_cfg) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) config = h.get_config() assert len(config) == len(h.name_map) cnames_config = config.values() for cls_name, cls_cfg in cnames_config: cls = hp.get_parameter(cls_name) assert cls.load_from_config(cls_cfg) h = hp.HyperParameterList.load_from_config(config) assert len(config) == len(h.name_map) cnames_config = list(config.values()) for cname_cfg in cnames_config: cls_name, cls_cfg = cname_cfg cls = hp.get_parameter(cls_name) assert cls.load_from_config(cls_cfg)
def test_dataset_parameters(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) assert len(params) == len(dataset.parameters) dataset.parameters = params assert len(params) == len(dataset.parameters)
def test_shac_simple_torch_custom_basepath(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) shac = torch_engine.TorchSHAC(h, total_budget=total_budget, max_gpu_evaluators=0, num_batches=batch_size, objective=objective, max_cpu_evaluators=1, save_dir='custom') assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 1 shac.num_parallel_evaluators = 1 shac.generator_backend = 'loky' # training shac.fit(evaluation_simple) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('custom/datasets/dataset.csv') assert os.path.exists('custom/classifiers/classifiers.pkl') # Serialization shac.save_data() # Restore with different batchsize shac2 = torch_engine.TorchSHAC(None, total_budget=total_budget, max_gpu_evaluators=1, num_batches=10, objective=objective, max_cpu_evaluators=2, save_dir='custom') assert shac2.limit_memory is True shac2.restore_data() # test no file found, yet no error shutil.rmtree('custom/') shac2.classifiers = None shac2.dataset = None shac2.restore_data()
def test_list_sample_seeded(): params = get_hyperparameter_list() h = hp.HyperParameterList(params, seed=0) sample = h.sample() assert len(sample) == 4 assert sample == [0, 4, 8.307984706426012, 'v1'] # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params, seed=0) sample = h.sample() assert len(sample) == 14 assert sample == [ 0, 1, 4, 6, 3, 8.307984706426012, 7.077778695483674, 8.648987433636128, 8.30596717785483, 8.261103406262468, 'v1', 'v1', 'v2', 'v2' ]
def test_list_add(): params = get_hyperparameter_list() h = hp.HyperParameterList() for param in params: h.add_hyper_parameter(param) assert h.name == 'parameter_list' assert h.num_choices == 4 for param in params: assert param.name in h.name_map.values() assert h.param2id[param.name] is not None # add a parameter whose name already exists in name map with pytest.raises(ValueError): h.add_hyper_parameter(params[0]) # add a null parameter with pytest.raises(ValueError): h.add_hyper_parameter(None) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList() for param in params: h.add_hyper_parameter(param) assert h.name == 'parameter_list' assert h.num_choices == 4 for param in params: assert param.name in h.name_map.values() assert h.param2id[param.name] is not None # add a parameter whose name already exists in name map with pytest.raises(ValueError): h.add_hyper_parameter(params[0]) # add a null parameter with pytest.raises(ValueError): h.add_hyper_parameter(None)
def __init__(self, hyperparameter_list, total_budget, num_batches, objective='max', max_classifiers=18): if total_budget % num_batches != 0: raise ValueError( "Number of epochs must be divisible by the batch size !") if hyperparameter_list is not None and (not isinstance( hyperparameter_list, hp.HyperParameterList)): hyperparameter_list = hp.HyperParameterList(hyperparameter_list) print("Number of workers possible : %d" % (total_budget // num_batches)) self.parameters = hyperparameter_list self.objective = objective self._total_budget = total_budget # N self.num_batches = num_batches # M self._max_classifiers = max_classifiers self._num_workers = self.total_budget // num_batches # W self._total_classifiers = min(max(num_batches - 1, 1), max_classifiers) # K # serializable self.dataset = data.Dataset(hyperparameter_list) self.classifiers = [] # type: list(xgb.XGBClassifier) # training variables self._dataset_index = 0 self._per_classifier_budget = int( self.num_workers * np.floor(total_budget / (float(self.num_workers * (self.total_classifiers + 1))))) # Tc print( "Using %d parallel workers, it will require %d epochs to fit %d classifiers.\n" "Each classifier will be provided %d samples to train per epoch." % ( self.num_workers, total_budget // self.num_workers, self._total_classifiers, self._per_classifier_budget, )) # Compute how many threads and processes will be used self._compute_parallelism() # serialization paths self._prepare_dirs()
def set_parameters(self, parameters): """ Sets the hyper parameter list manager # Arguments: parameters (hp.HyperParameterList | list): a Hyper Parameter List or a python list of Hyper Parameters. """ if not isinstance(parameters, hp.HyperParameterList): parameters = hp.HyperParameterList(parameters) self._parameters = parameters
def test_dataset_multi_param_list(): params = get_multi_parameter_list() dataset = data.Dataset(params) assert isinstance(dataset._parameters, hp.HyperParameterList) dataset.set_parameters(params) assert isinstance(dataset._parameters, hp.HyperParameterList) h = hp.HyperParameterList(params) dataset.set_parameters(h) assert isinstance(dataset._parameters, hp.HyperParameterList)
def test_list_encoded_decoded_numpy(): params = get_hyperparameter_list() h = hp.HyperParameterList(params, seed=0) sample = np.array(h.sample()) encoded = h.encode(sample) encoding = [0., 1., 8.30798471, 0] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = np.array(h.decode(encoded)) decoded_ = decoded[:3].astype('float') sample_ = sample[:3].astype('float') assert np.allclose(decoded_, sample_, rtol=1e-5) sample = np.array([h.sample()]) with pytest.raises(ValueError): h.encode(sample) h.decode(sample) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params, seed=0) sample = np.array(h.sample()) encoded = h.encode(sample) encoding = [ 0., 1., 1., 3., 0., 8.30798471, 7.0777787, 8.64898743, 8.30596718, 8.26110341, 0., 0., 1., 1. ] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = np.array(h.decode(encoded)) decoded_ = decoded[:10].astype('float') sample_ = sample[:10].astype('float') assert np.allclose(decoded_, sample_, rtol=1e-5) sample = np.array([h.sample()]) with pytest.raises(ValueError): h.encode(sample) h.decode(sample)
def test_list_encoded_decoded_numpy(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) sample = np.array(h.sample()) encoded = h.encode(sample) encoding = [0., 3., 9.1455681, 1.] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = np.array(h.decode(encoded)) decoded_ = decoded[:3].astype('float') sample_ = sample[:3].astype('float') assert np.allclose(decoded_, sample_, rtol=1e-5) sample = np.array([h.sample()]) with pytest.raises(ValueError): h.encode(sample) h.decode(sample) # Multi parameter tests params = get_multi_parameter_list() h = hp.HyperParameterList(params) sample = np.array(h.sample()) encoded = h.encode(sample) encoding = [ 2., 0., 0., 0., 2., 8.43299535, 9.43650619, 8.43993152, 8.17835439, 9.50823629, 0., 1., 0., 1. ] assert np.allclose(encoded, encoding, rtol=1e-5) decoded = np.array(h.decode(encoded)) decoded_ = decoded[:10].astype('float') sample_ = sample[:10].astype('float') assert np.allclose(decoded_, sample_, rtol=1e-5) sample = np.array([h.sample()]) with pytest.raises(ValueError): h.encode(sample) h.decode(sample)
def test_shac_initialization(): total_budget = 50 batch_size = 5 objective = 'max' params = get_hyperparameter_list() h = hp.HyperParameterList(params) # direct params list submission shac = engine.SHAC(params, total_budget=total_budget, num_batches=batch_size, objective=objective) # submission of HyperParameterList shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) # default number of parallel executors shac.set_num_parallel_generators(None) shac.set_num_parallel_evaluators(None) shac.concurrent_evaluators() shac.parallel_evaluators() assert shac.generator_backend == 'loky' assert shac.evaluator_backend == 'loky' shac.num_parallel_generators = 20 assert shac.num_parallel_generators == 20 shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) with pytest.raises(ValueError): shac.generator_backend = 'random' with pytest.raises(ValueError): shac.evaluator_backend = 'random' shac = engine.SHAC(None, total_budget=total_budget, num_batches=batch_size, objective=objective) # No parameters with pytest.raises(RuntimeError): shac.predict()
def test_shac_simple_multiparameter(): total_budget = 50 batch_size = 5 objective = 'max' params = get_multi_parameter_list() h = hp.HyperParameterList(params) shac = engine.SHAC(h, total_budget=total_budget, num_batches=batch_size, objective=objective) assert shac.total_classifiers == min(max(batch_size - 1, 1), 18) assert shac._per_classifier_budget == 10 assert shac.num_workers == 10 assert len(shac.classifiers) == 0 assert len(shac.dataset) == 0 # do sequential work for debugging shac.num_parallel_generators = 2 shac.num_parallel_evaluators = 2 # training shac.fit(evaluation_simple_multi) assert len(shac.classifiers) <= shac.total_classifiers assert os.path.exists('shac/datasets/dataset.csv') assert os.path.exists('shac/classifiers/classifiers.pkl') print() print("Evaluating after training") np.random.seed(0) # Serialization shac.save_data() # Restore with different batchsize shac2 = engine.SHAC(None, total_budget=total_budget, num_batches=10, objective=objective) shac2.restore_data() np.random.seed(0) # test no file found, yet no error shutil.rmtree('shac/') shac2.dataset = None shac2.classifiers = None shac2.restore_data()
def __init__(self, parameter_list=None): if not isinstance(parameter_list, hp.HyperParameterList): if type(parameter_list) == list or type(parameter_list) == tuple: parameter_list = hp.HyperParameterList(parameter_list) self._parameters = parameter_list self.X = [] self.Y = [] self.size = 0 self.basedir = 'shac' self._prepare_dir()