示例#1
0
    trainer.train()


if __name__ == '__main__':

    sweep_params = {
        'seed': [1, 2, 3],
        'env': [
            HalfCheetahRandDirecEnv, AntRandDirecEnv, HopperRandParamsEnv,
            Walker2DRandDirecEnv, HumanoidRandDirec2DEnv, Walker2DRandParamsEnv
        ],
        'rollouts_per_meta_task': [20],
        'max_path_length': [100],
        'parallel': [True],
        'discount': [0.99],
        'normalize_adv': [True],
        'positive_adv': [False],
        'hidden_sizes': [(64, 64)],
        'learn_std': [True],
        'hidden_nonlinearity': [tf.tanh],
        'output_nonlinearity': [None],
        'inner_lr': [0.1],
        'learning_rate': [1e-3],
        'n_itr': [1001],
        'meta_batch_size': [40],
        'num_inner_grad_steps': [1],
        'scope': [None],
    }

    run_sweep(run_experiment, sweep_params, EXP_NAME, INSTANCE_TYPE)
示例#2
0
            logger.logkv("iter", i)
            logger.logkv("iter_time", timer.time() - start)
            logger.dumpkvs()
            if i == 0:
                sess.graph.finalize()


if __name__ == '__main__':
    sweep_params = {
        'alg': ['her'],
        'seed': [399856203240],
        'env': [
            'HandManipulateEgg-v0', 'HandManipulatePen-v0',
            'HandManipulateBlock-v0'
        ],
        'env': ['HandManipulateEgg-v0'],

        # Env Sampling
        'fixed_num_of_contact': [7],

        # Problem Conf
        'obs_type': ['contact', 'full_contact'],
        'obs_type': ['full_contact'],
        'process_type': ['max_pool', 'pointnet'],
        'process_type': ['pointnet'],
        'prediction': ['object'],
        'feature_dim': [128],
        'feature_layer': [0],
    }
    run_sweep(main, sweep_params, EXP_NAME, INSTANCE_TYPE)
         ],  # Dummy; we don't time out episodes (they time out by themselves)
        "gae_lambda": [.99],
        "normalize_adv": [True],
        "positive_adv": [False],
    }
    DEFAULT = 'DEFAULT'
    parser = argparse.ArgumentParser()
    for key, value in sweep_params.items():
        parser.add_argument(f'--{key}', default=DEFAULT)
    args = parser.parse_args()
    for k in vars(args):
        v = getattr(args, k)
        if not v == DEFAULT:
            arg_type = args_type(sweep_params[k][0])
            sweep_params[k] = [arg_type(v)]

    # DEBUG HPARAMS
    if DEBUG:
        sweep_params['meta_batch_size'] = [2]
        sweep_params['success_threshold'] = [0]
        sweep_params['accuracy_threshold'] = [0]
        sweep_params['hidden_sizes'] = [(2, )]
        sweep_params['backprop_steps'] = [1]
        sweep_params['max_path_length'] = [3]
        sweep_params['parallel'] = [False]
        sweep_params["memory_dim"] = [3]  # 2048
        sweep_params["instr_dim"] = [4]  # 256

    run_sweep(run_experiment, sweep_params, sweep_params['prefix'][0], parser,
              INSTANCE_TYPE)
        'use_cem': [False],
        'num_cem_iters': [5],

        # Training
        'dynamics_learning_rate': [5e-4, 0.001],
        'valid_split_ratio': [0.1],
        'initial_random_samples': [True],
        'initial_sinusoid_samples': [False],

        # Dynamics Model
        'recurrent': [False],
        'num_models': [5],
        'dynamics_hidden_nonlinearity': ['swish'],
        'dynamics_output_nonlinearity': [None],
        'dynamics_hidden_sizes': [(512, 512, 512)],
        'dynamic_model_epochs': [50],  # UNUSED
        'dynamics_buffer_size': [25000],
        'backprop_steps': [100],
        'weight_normalization_model': [False],  # FIXME: Doesn't work
        'dynamics_batch_size': [64],
        'cell_type': ['lstm'],

        #  Other
        'n_parallel': [1],
        'exp_tag': ['parallel-mbmpc']
    }

    assert config['n_candidates'][0] % config['num_models'][0] == 0  # FIXME: remove constraint

    run_sweep(run_experiment, config, EXP_NAME, INSTANCE_TYPE)
示例#5
0
        'normalize': [False],
        'n_itr': [30],
        'discount': [0.99],

        # Policy
        'n_candidates': [128],
        'horizon': [5],
        'use_cem': [False],
        'num_cem_iters': [4],

        # Training
        'num_rollouts': [20],
        'learning_rate': [0.001],
        'valid_split_ratio': [0.1],
        'rolling_average_persitency': [0.99],
        'initial_random_samples': [True],

        # Dynamics Model
        'recurrent': [False],
        'num_models': [3],
        'hidden_nonlinearity_model': ['relu'],
        'hidden_sizes_model': [(512, 512)],
        'dynamic_model_epochs': [200],
        'backprop_steps': [100],
        'weight_normalization_model': [False],  # FIXME: Doesn't work
        'batch_size_model': [256],
        'cell_type': ['lstm'],
    }

    run_sweep(run_experiment, config, EXP_NAME)
示例#6
0
        'num_models': [10],
        'dynamics_hidden_sizes': [(512, 512)],
        'dyanmics_hidden_nonlinearity': ['relu'],
        'dyanmics_output_nonlinearity': [None],
        'dynamics_max_epochs': [50],
        'dynamics_learning_rate': [1e-3],
        'dynamics_batch_size': [128],
        'dynamics_buffer_size': [5000],


        # Policy
        'policy_hidden_sizes': [(64, 64)],
        'policy_learn_std': [True],
        'policy_hidden_nonlinearity': [tf.tanh],
        'policy_output_nonlinearity': [None],

        # Meta-Algo
        'meta_batch_size': [10],  # Note: It has to be multiple of num_models
        'rollouts_per_meta_task': [50],
        'num_inner_grad_steps': [1],
        'inner_lr': [0.001],
        'inner_type': ['log_likelihood'],
        'step_size': [0.01],
        'exploration': [False],

        'scope': [None],
        'exp_tag': [''], # For changes besides hyperparams
    }

    run_sweep(run_experiment, sweep_params, EXP_NAME)