def test_fit_model(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) fit_model(X, y, space, seed=1)
def test_generate_biased_replicates_last_epoch(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) replicates = generate_biased_replicates( data, configs['random_search'][f'{NAMESPACE}-random-search-s-0'], variables, objective, num_replicates, hpo_budget, early_stopping=False) best_trial_index = 6 rng = numpy.random.RandomState( configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) for replicate in replicates: should_be = copy.deepcopy(defaults) for param in space.keys(): assert replicate[param] == float( data.sel(order=best_trial_index)[param].values) should_be[param] = replicate[param] for variable in variables: assert replicate[variable] == rng.randint(2**30) should_be[variable] = replicate[variable] assert replicate['uid'] == compute_identity(should_be, IDENTITY_SIZE)
def test_generate_simulated_fix(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) config = configs['random_search'][f'{NAMESPACE}-random-search-s-0'] # Make sure the defaults have been replaced by randomized seeds assert config['defaults']['d'] != defaults['d'] assert config['defaults']['e'] != defaults['e'] replicates = generate_simulated_fix(data, config, variables, objective, hpo_budget, num_replicates, early_stopping=False) assert len(replicates) == num_replicates for i in range(1, num_replicates): assert replicates[i]['a'] != replicates[0]['a'] assert replicates[i]['b'] != replicates[0]['b'] assert replicates[i]['c'] != replicates[0]['c'] assert replicates[i]['uid'] != replicates[0]['uid'] assert replicates[i]['d'] == config['defaults']['d'] assert replicates[i]['e'] == config['defaults']['e']
def generate_mocked_replicates(num_replicates, num_experiments=5): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 3} defaults = {'d': 1, 'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) randomize_seeds(configs['random_search'], variables, seed) hpos_ready = dict(random_search=[]) data = dict(random_search=dict()) for hpo_namespace, config in configs['random_search'].items(): hpos_ready['random_search'].append(hpo_namespace) data['random_search'][hpo_namespace] = build_data( surrogate_budget, variables, defaults, space) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) return generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False)
def test_simulate_hpo(): # fit a model # simulate # test what? ... space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective) model = fit_model(X, y, space, seed=1) sample = simulate_hpo( model, space, hpo_budget, configs['random_search'][f'{NAMESPACE}-random-search-s-0']['seed']) assert sample.keys() == space.keys()
def test_randomize_seeds(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = ['d', 'e'] defaults = {} seed = 2 num_experiments = 5 hpo = 'random_search' fidelity = Fidelity(1, 1, name='epoch').to_dict() configs = generate_hpos(list(range(num_experiments)), [hpo], budget=200, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed, compute_id=True) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) uid = config['defaults'].pop('uid') assert uid == compute_identity(config['defaults'], IDENTITY_SIZE) randomize_seeds(configs['random_search'], variables, seed, compute_id=False) rng = numpy.random.RandomState(seed) for config in configs['random_search'].values(): for variable in variables: assert config['defaults'][variable] == rng.randint(2**30) assert 'uid' not in config['defaults']
def test_convert_data_to_xy(): space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'loguniform(lower=1, upper=10)' } variables = {'d': 2, 'e': 1} defaults = {'d': 1, 'e': 2} seed = 2 num_experiments = 5 hpo = 'random_search' objective = 'obj' num_replicates = 10 fidelity = Fidelity(1, 1, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) randomize_seeds(configs['random_search'], variables, seed) data = build_data(surrogate_budget, variables, defaults, space) X, y = convert_data_to_xy(data, space, objective, early_stopping=False) assert numpy.array_equal(X[:, 0], data['a'].values.reshape(-1)) assert numpy.array_equal(X[:, 1], data['b'].values.reshape(-1)) assert numpy.array_equal(X[:, 2], numpy.log(data['c'].values.reshape(-1))) assert numpy.array_equal(y, data[objective].isel(epoch=-1).values.reshape(-1)) assert y.shape == (surrogate_budget, )
def test_consolidate_results(client): num_experiments = 5 num_replicates = 10 space = { 'a': 'uniform(lower=-1, upper=1)', 'b': 'uniform(lower=-1, upper=1)', 'c': 'uniform(lower=-1, upper=1)' } variables = {'d': 5} defaults = {'e': 2, 'epoch': 5} seed = 2 hpo = 'random_search' objective = 'obj' fidelity = Fidelity(5, 5, name='epoch').to_dict() surrogate_budget = 10 hpo_budget = 5 configs = generate_hpos(list(range(num_experiments)), [hpo], budget=surrogate_budget, fidelity=fidelity, search_space=space, namespace=NAMESPACE, defaults=defaults) to_replicate = get_configs_to_replicate(configs, num_experiments) reset_pool_size(configs['random_search']) randomize_seeds(configs['random_search'], variables, seed) variable_names = list(sorted(variables.keys())) hpo_stats = fetch_all_hpo_stats(client, NAMESPACE) namespaces = register_hpos(client, NAMESPACE, foo, configs, defaults, hpo_stats) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() data = defaultdict(dict) hpos_ready, remainings = fetch_hpos_valid_curves(client, namespaces, variable_names, data) ready_configs = get_ready_configs(hpos_ready, configs, to_replicate) replicates = generate_replicates(ready_configs, data, variables, objective, hpo_budget, num_replicates, early_stopping=False) register(client, foo, NAMESPACE, replicates) worker = TrialWorker(URI, DATABASE, 0, None) worker.max_retry = 0 worker.timeout = 0.02 worker.run() print(fetch_vars_stats(client, NAMESPACE)) data = fetch_hpos_replicates(client, configs, replicates, variable_names, space, data) data = consolidate_results(data) assert len(data) == 1 assert len(data['random_search']) == 4 hpo_reps = data['random_search'] assert hpo_reps['ideal'].obj.shape == (6, surrogate_budget, num_experiments) assert hpo_reps['biased'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-fix'].obj.shape == (6, num_replicates, num_experiments) assert hpo_reps['simul-free'].obj.shape == (6, num_replicates, num_experiments) def count_unique(attr): return len(set(attr.values.reshape(-1).tolist())) # Test sources of variation # NOTE: In ideal, source of variation will vary across ideal after consolidation # but it stays fixed during the HPO itself assert count_unique(hpo_reps['ideal']['d']) == num_experiments assert count_unique(hpo_reps['biased']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-free']['d']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['d']) == num_experiments # Test HPs assert count_unique(hpo_reps['ideal']['a']) == (num_experiments * surrogate_budget) assert count_unique(hpo_reps['biased']['a']) == num_experiments assert count_unique(hpo_reps['simul-free']['a']) == (num_replicates * num_experiments) assert count_unique(hpo_reps['simul-fix']['a']) == (num_replicates * num_experiments) assert numpy.allclose(hpo_reps['simul-free']['a'].values, hpo_reps['simul-fix']['a'].values)