Exemplo n.º 1
0
 def test_log_float_param(self):
     param = hp.LogFloatParam("variable", 1e-5, 1e-1)
     n = 10000
     num_success = 0
     threshold = 1e-3
     for _ in range(n):
         if param.generate() > threshold:
             num_success += 1
     p = 0.5
     self.assertTrue(is_binomial_trial_likely(n, p, num_success))
Exemplo n.º 2
0
def main():
    num_hyperparameters = 40
    layer_norm = True
    sweeper = hp.RandomHyperparameterSweeper([
        hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("policy_learning_rate", 1e-5, 1e-1),
        hp.LogFloatParam("reward_scale", 10.0, 0.001),
        hp.LogFloatParam("discount", 0.5, 0.99),
    ])
    for seed in range(num_hyperparameters):
        params_dict = sweeper.generate_random_hyperparameters()
        variant = dict(
            algo_params=dict(batch_size=128,
                             n_epochs=50,
                             epoch_length=1000,
                             eval_samples=1000,
                             replay_pool_size=1000000,
                             min_pool_size=256,
                             max_path_length=1000,
                             qf_weight_decay=0.00,
                             n_updates_per_time_step=5,
                             soft_target_tau=0.01,
                             **params_dict),
            env_params=dict(
                env_id='cart',
                normalize_env=True,
                gym_name="",
            ),
            policy_params=dict(layer_norm=layer_norm, ),
            qf_params=dict(layer_norm=layer_norm, ),
        )
        run_experiment(
            my_ddpg_launcher,
            exp_prefix="3-16-cartpole-ddpg-sweep-test",
            seed=seed,
            variant=variant,
            mode="ec2",
        )
Exemplo n.º 3
0
    def test_sweep_hyperparameters(self):
        sweeper = hp.RandomHyperparameterSweeper([
            hp.LinearFloatParam("v1", -10, 10),
            hp.LogFloatParam("v2", 1e-5, 1e-1),
        ])
        n = 100
        num_successes = np.zeros((2, 2))
        threshold_v1 = 0
        threshold_v2 = 1e-3

        def update_success(v1, v2):
            success_v1 = int(v1 > threshold_v1)
            success_v2 = int(v2 > threshold_v2)
            num_successes[success_v1, success_v2] += 1

        sweeper.sweep_hyperparameters(update_success, n)
        p = 0.25
        for i in range(2):
            for j in range(2):
                self.assertTrue(
                    is_binomial_trial_likely(n, p, num_successes[i, j]))
Exemplo n.º 4
0
    def get_launch_settings(algo_name):
        """
        Return a dictionary of the form
        {
            'algo_params': algo_params to pass to run_algorithm
            'variant': variant to pass to run_algorithm
        }
        :param algo_name: Name of the algorithm to run.
        :return:
        """
        sweeper = hp.RandomHyperparameterSweeper()
        algo_params = {}
        if algo_name == 'ddpg' or algo_name == 'mddpg':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("policy_learning_rate", 1e-6, 1e-3),
                hp.LogFloatParam("reward_scale", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-5, 1e-2),
            ])
            algo_params = get_ddpg_params()
            algo_params['render'] = render
            variant = {
                'qf_params': dict(
                    embedded_hidden_sizes=(100,),
                    observation_hidden_sizes=(100,),
                    hidden_nonlinearity=tf.nn.relu,
                ),
                'policy_params': dict(
                    observation_hidden_sizes=(100, 100),
                    hidden_nonlinearity=tf.nn.relu,
                )
            }
            if algo_name == 'ddpg':
                algorithm_launcher = my_ddpg_launcher
                variant['Algorithm'] = 'DDPG'
                variant['policy_params']['output_nonlinearity'] = tf.nn.tanh
            else:
                algorithm_launcher = mem_ddpg_launcher
                variant['Algorithm'] = 'Memory-DDPG'
        elif algo_name == 'naf':
            sweeper = hp.RandomHyperparameterSweeper([
                hp.LogFloatParam("qf_learning_rate", 1e-5, 1e-2),
                hp.LogFloatParam("reward_scale", 10.0, 0.001),
                hp.LogFloatParam("soft_target_tau", 1e-6, 1e-1),
                hp.LogFloatParam("qf_weight_decay", 1e-7, 1e-1),
            ])
            algo_params = get_my_naf_params()
            algo_params['render'] = render
            algorithm_launcher = naf_launcher
            variant = {
                'Algorithm': 'NAF',
                'exploration_strategy_params': {
                    'sigma': 0.15
                },
            }
        elif algo_name == 'random':
            algorithm_launcher = random_action_launcher
            variant = {'Algorithm': 'Random'}
        elif algo_name == 'bptt':
            algorithm_launcher = bptt_launcher
            variant = {'Algorithm': 'BPTT'}
        else:
            raise Exception("Algo name not recognized: " + algo_name)

        # bn_sweeper = hp.RandomHyperparameterSweeper([
        #     hp.EnumParam("decay", [0.9, 0.99, 0.999, 0.9999]),
        #     hp.LogFloatParam("epsilon", 1e-3, 1e-7),
        #     hp.EnumParam("enable_offset", [True, False]),
        #     hp.EnumParam("enable_scale", [True, False]),
        # ])
        bn_sweeper = None
        return {
            'sweeper': sweeper,
            'batch_norm_sweeper': bn_sweeper,
            'variant': variant,
            'algo_params': algo_params,
            'algorithm_launcher': algorithm_launcher,
            'batch_norm_params': BATCH_NORM_PARAMS
        }
Exemplo n.º 5
0
             seed = random.randint(0, 10000)
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
                 mode=mode,
                 variant=variant,
                 exp_id=exp_id,
                 sync_s3_log=True,
                 sync_s3_pkl=True,
                 periodic_sync_interval=600,
             )
 if run_mode == 'random':
     hyperparameters = [
         hyp.LinearFloatParam('algo_params.discount', 0, 1),
         hyp.LogFloatParam('algo_params.policy_learning_rate', 1e-7, 1e-1),
         hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-7, 1e-1),
         hyp.LogIntParam('algo_params.target_hard_update_period', 1, 1000),
     ]
     sweeper = hyp.RandomHyperparameterSweeper(
         hyperparameters,
         default_kwargs=variant,
     )
     for _ in range(num_configurations):
         for exp_id in range(n_seeds):
             seed = random.randint(0, 10000)
             variant = sweeper.generate_random_hyperparameters()
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
Exemplo n.º 6
0
             )
 if run_mode == 'random':
     for (
         rnn_cell,
         output_activation,
     ) in [
         (LSTMCell, F.tanh),
         (LSTMCell, ptu.clip1),
         (GRUCell, F.tanh),
         (GRUCell, ptu.clip1),
     ]:
         variant['policy_params']['cell_class'] = rnn_cell
         variant['policy_params']['output_activation'] = output_activation
         hyperparameters = [
             hyp.LogIntParam('memory_dim', 4, 400),
             hyp.LogFloatParam('algo_params.qf_learning_rate', 1e-5, 1e-2),
             hyp.LogFloatParam(
                 'algo_params.write_policy_learning_rate', 1e-5, 1e-3
             ),
             hyp.LogFloatParam(
                 'algo_params.action_policy_learning_rate', 1e-5, 1e-3
             ),
             # hyp.EnumParam(
             #     'algo_params.action_policy_optimize_bellman', [True, False],
             # ),
             # hyp.EnumParam(
             #     'algo_params.use_action_policy_params_for_entire_policy',
             #     [True, False],
             # ),
             # hyp.EnumParam(
             #     'algo_params.write_policy_optimizes', ['both', 'qf', 'bellman']
Exemplo n.º 7
0
             n_seeds,
             experiment,
             exp_prefix=exp_prefix,
         ),
         search_space=search_space,
         extra_function_kwargs=variant,
         maximize=True,
         verbose=True,
         load_trials=True,
         num_rounds=500,
         num_evals_per_round=1,
     )
 elif run_mode == 'random':
     hyperparameters = [
         hyp.LinearFloatParam('foo', 0, 1),
         hyp.LogFloatParam('bar', 1e-5, 1e2),
     ]
     sweeper = hyp.RandomHyperparameterSweeper(
         hyperparameters,
         default_kwargs=variant,
     )
     for _ in range(num_configurations):
         for exp_id in range(n_seeds):
             seed = random.randint(0, 10000)
             variant = sweeper.generate_random_hyperparameters()
             run_experiment(
                 experiment,
                 exp_prefix=exp_prefix,
                 seed=seed,
                 mode=mode,
                 variant=variant,