def test_sweep_hyperparameters(self): sweeper = hp.RandomHyperparameterSweeper([ hp.LinearFloatParam("v1", -10, 10), hp.LogFloatParam("v2", 1e-5, 1e-1), ]) n = 100 num_successes = np.zeros((2, 2)) threshold_v1 = 0 threshold_v2 = 1e-3 def update_success(v1, v2): success_v1 = int(v1 > threshold_v1) success_v2 = int(v2 > threshold_v2) num_successes[success_v1, success_v2] += 1 sweeper.sweep_hyperparameters(update_success, n) p = 0.25 for i in range(2): for j in range(2): self.assertTrue( is_binomial_trial_likely(n, p, num_successes[i, j]))
def main(): num_hyperparameters = 40 layer_norm = True sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("discount", 0.5, 0.99), ]) for seed in range(num_hyperparameters): params_dict = sweeper.generate_random_hyperparameters() variant = dict( algo_params=dict(batch_size=128, n_epochs=50, epoch_length=1000, eval_samples=1000, replay_pool_size=1000000, min_pool_size=256, max_path_length=1000, qf_weight_decay=0.00, n_updates_per_time_step=5, soft_target_tau=0.01, **params_dict), env_params=dict( env_id='cart', normalize_env=True, gym_name="", ), policy_params=dict(layer_norm=layer_norm, ), qf_params=dict(layer_norm=layer_norm, ), ) run_experiment( my_ddpg_launcher, exp_prefix="3-16-cartpole-ddpg-sweep-test", seed=seed, variant=variant, mode="ec2", )
def get_launch_settings(algo_name): """ Return a dictionary of the form { 'algo_params': algo_params to pass to run_algorithm 'variant': variant to pass to run_algorithm } :param algo_name: Name of the algorithm to run. :return: """ sweeper = hp.RandomHyperparameterSweeper() algo_params = {} if algo_name == 'ddpg' or algo_name == 'mddpg': sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2), hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-3), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("soft_target_tau", 1e-5, 1e-2), ]) algo_params = get_ddpg_params() algo_params['render'] = render variant = { 'qf_params': dict( embedded_hidden_sizes=(100,), observation_hidden_sizes=(100,), hidden_nonlinearity=tf.nn.relu, ), 'policy_params': dict( observation_hidden_sizes=(100, 100), hidden_nonlinearity=tf.nn.relu, ) } if algo_name == 'ddpg': algorithm_launcher = my_ddpg_launcher variant['Algorithm'] = 'DDPG' variant['policy_params']['output_nonlinearity'] = tf.nn.tanh else: algorithm_launcher = mem_ddpg_launcher variant['Algorithm'] = 'Memory-DDPG' elif algo_name == 'naf': sweeper = hp.RandomHyperparameterSweeper([ hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2), hp.LogFloatParam("reward_scale", 10.0, 0.001), hp.LogFloatParam("soft_target_tau", 1e-6, 1e-1), hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1), ]) algo_params = get_my_naf_params() algo_params['render'] = render algorithm_launcher = naf_launcher variant = { 'Algorithm': 'NAF', 'exploration_strategy_params': { 'sigma': 0.15 }, } elif algo_name == 'random': algorithm_launcher = random_action_launcher variant = {'Algorithm': 'Random'} elif algo_name == 'bptt': algorithm_launcher = bptt_launcher variant = {'Algorithm': 'BPTT'} else: raise Exception("Algo name not recognized: " + algo_name) # bn_sweeper = hp.RandomHyperparameterSweeper([ # hp.EnumParam("decay", [0.9, 0.99, 0.999, 0.9999]), # hp.LogFloatParam("epsilon", 1e-3, 1e-7), # hp.EnumParam("enable_offset", [True, False]), # hp.EnumParam("enable_scale", [True, False]), # ]) bn_sweeper = None return { 'sweeper': sweeper, 'batch_norm_sweeper': bn_sweeper, 'variant': variant, 'algo_params': algo_params, 'algorithm_launcher': algorithm_launcher, 'batch_norm_params': BATCH_NORM_PARAMS }
mode=mode, variant=variant, exp_id=exp_id, sync_s3_log=True, sync_s3_pkl=True, periodic_sync_interval=600, ) if run_mode == 'random': hyperparameters = [ hyp.LinearFloatParam('algo_params.discount', 0, 1), hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1), hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1), hyp.LogIntParam('algo_params.target_hard_update_period', 1, 1000), ] sweeper = hyp.RandomHyperparameterSweeper( hyperparameters, default_kwargs=variant, ) for _ in range(num_configurations): for exp_id in range(n_seeds): seed = random.randint(0, 10000) variant = sweeper.generate_random_hyperparameters() run_experiment( experiment, exp_prefix=exp_prefix, seed=seed, mode=mode, variant=variant, exp_id=exp_id, sync_s3_log=True, sync_s3_pkl=True, periodic_sync_interval=600,