def test_log_float_param(self): param = hp.LogFloatParam("variable", 1e-5, 1e-1) n = 10000 num_success = 0 threshold = 1e-3 for _ in range(n): if param.generate() > threshold: num_success += 1 p = 0.5 self.assertTrue(is_binomial_trial_likely(n, p, num_success))
def main(): num_hyperparameters = 40 layer_norm = True sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("discount", 0.5, 0.99), ]) for seed in range(num_hyperparameters): params_dict = sweeper.generate_random_hyperparameters() variant = dict( algo_params=dict(batch_size=128, n_epochs=50, epoch_length=1000, eval_samples=1000, replay_pool_size=1000000, min_pool_size=256, max_path_length=1000, qf_weight_decay=0.00, n_updates_per_time_step=5, soft_target_tau=0.01, **params_dict), env_params=dict( env_id='cart', normalize_env=True, gym_name="", ), policy_params=dict(layer_norm=layer_norm, ), qf_params=dict(layer_norm=layer_norm, ), ) run_experiment( my_ddpg_launcher, exp_prefix="3-16-cartpole-ddpg-sweep-test", seed=seed, variant=variant, mode="ec2", )
def test_sweep_hyperparameters(self): sweeper = hp.RandomHyperparameterSweeper([ hp.LinearFloatParam("v1", -10, 10), hp.LogFloatParam("v2", 1e-5, 1e-1), ]) n = 100 num_successes = np.zeros((2, 2)) threshold_v1 = 0 threshold_v2 = 1e-3 def update_success(v1, v2): success_v1 = int(v1 > threshold_v1) success_v2 = int(v2 > threshold_v2) num_successes[success_v1, success_v2] += 1 sweeper.sweep_hyperparameters(update_success, n) p = 0.25 for i in range(2): for j in range(2): self.assertTrue( is_binomial_trial_likely(n, p, num_successes[i, j]))
def get_launch_settings(algo_name): """ Return a dictionary of the form { 'algo_params': algo_params to pass to run_algorithm 'variant': variant to pass to run_algorithm } :param algo_name: Name of the algorithm to run. :return: """ sweeper = hp.RandomHyperparameterSweeper() algo_params = {} if algo_name == 'ddpg' or algo_name == 'mddpg': sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2), hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-3), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("soft_target_tau", 1e-5, 1e-2), ]) algo_params = get_ddpg_params() algo_params['render'] = render variant = { 'qf_params': dict( embedded_hidden_sizes=(100,), observation_hidden_sizes=(100,), hidden_nonlinearity=tf.nn.relu, ), 'policy_params': dict( observation_hidden_sizes=(100, 100), hidden_nonlinearity=tf.nn.relu, ) } if algo_name == 'ddpg': algorithm_launcher = my_ddpg_launcher variant['Algorithm'] = 'DDPG' variant['policy_params']['output_nonlinearity'] = tf.nn.tanh else: algorithm_launcher = mem_ddpg_launcher variant['Algorithm'] = 'Memory-DDPG' elif algo_name == 'naf': sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("soft_target_tau", 1e-6, 1e-1), hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1), ]) algo_params = get_my_naf_params() algo_params['render'] = render algorithm_launcher = naf_launcher variant = { 'Algorithm': 'NAF', 'exploration_strategy_params': { 'sigma': 0.15 }, } elif algo_name == 'random': algorithm_launcher = random_action_launcher variant = {'Algorithm': 'Random'} elif algo_name == 'bptt': algorithm_launcher = bptt_launcher variant = {'Algorithm': 'BPTT'} else: raise Exception("Algo name not recognized: " + algo_name) # bn_sweeper = hp.RandomHyperparameterSweeper([ # hp.EnumParam("decay", [0.9, 0.99, 0.999, 0.9999]), # hp.LogFloatParam("epsilon", 1e-3, 1e-7), # hp.EnumParam("enable_offset", [True, False]), # hp.EnumParam("enable_scale", [True, False]), # ]) bn_sweeper = None return { 'sweeper': sweeper, 'batch_norm_sweeper': bn_sweeper, 'variant': variant, 'algo_params': algo_params, 'algorithm_launcher': algorithm_launcher, 'batch_norm_params': BATCH_NORM_PARAMS }
seed = random.randint(0, 10000) run_experiment( experiment, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, sync_s3_log=True, sync_s3_pkl=True, periodic_sync_interval=600, ) if run_mode == 'random': hyperparameters = [ hyp.LinearFloatParam('algo_params.discount', 0, 1), hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1), hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1), hyp.LogIntParam('algo_params.target_hard_update_period', 1, 1000), ] sweeper = hyp.RandomHyperparameterSweeper( hyperparameters, default_kwargs=variant, ) for _ in range(num_configurations): for exp_id in range(n_seeds): seed = random.randint(0, 10000) variant = sweeper.generate_random_hyperparameters() run_experiment( experiment, exp_prefix=exp_prefix, seed=seed,
) if run_mode == 'random': for ( rnn_cell, output_activation, ) in [ (LSTMCell, F.tanh), (LSTMCell, ptu.clip1), (GRUCell, F.tanh), (GRUCell, ptu.clip1), ]: variant['policy_params']['cell_class'] = rnn_cell variant['policy_params']['output_activation'] = output_activation hyperparameters = [ hyp.LogIntParam('memory_dim', 4, 400), hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-5, 1e-2), hyp.LogFloatParam( 'algo_params.write_policy_learning_rate', 1e-5, 1e-3 ), hyp.LogFloatParam( 'algo_params.action_policy_learning_rate', 1e-5, 1e-3 ), # hyp.EnumParam( # 'algo_params.action_policy_optimize_bellman', [True, False], # ), # hyp.EnumParam( # 'algo_params.use_action_policy_params_for_entire_policy', # [True, False], # ), # hyp.EnumParam( # 'algo_params.write_policy_optimizes', ['both', 'qf', 'bellman']
n_seeds, experiment, exp_prefix=exp_prefix, ), search_space=search_space, extra_function_kwargs=variant, maximize=True, verbose=True, load_trials=True, num_rounds=500, num_evals_per_round=1, ) elif run_mode == 'random': hyperparameters = [ hyp.LinearFloatParam('foo', 0, 1), hyp.LogFloatParam('bar', 1e-5, 1e2), ] sweeper = hyp.RandomHyperparameterSweeper( hyperparameters, default_kwargs=variant, ) for _ in range(num_configurations): for exp_id in range(n_seeds): seed = random.randint(0, 10000) variant = sweeper.generate_random_hyperparameters() run_experiment( experiment, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant,