Пример #1
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    exp_dir += "/" + kwargs['opt_type'] + "_" + kwargs['exp_name']
    os.makedirs(exp_dir)
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)
    set_seed(kwargs['seed'])

    # Instantiate classes

    t0 = time.time()
    run_1d_experiment(
        opt_type=kwargs['opt_type'],
        num_meta_tasks=kwargs['num_meta_tasks'],
        horizon=kwargs['horizon'],
        num_samples=kwargs['num_samples'],
        num_itr=kwargs['num_itr'],
        lr=kwargs['lr'],
        inner_lr=kwargs['inner_lr'],
        init_state_std=kwargs['init_state_std'],
        exp_dir=exp_dir,
    )

    print("time taken:  ", time.time() - t0)
Пример #2
0
def main(config):
    set_seed(config['seed'])
    tf.compat.v1.disable_eager_execution()
    physical_devices = tf.config.list_physical_devices('GPU')
    for gpu_id in range(len(physical_devices)):
        tf.config.experimental.set_memory_growth(physical_devices[gpu_id],
                                                 True)

    baseline = LinearFeatureBaseline()
    env = normalize(ENV_DICT[config['env']]())

    policy = MetaGaussianMLPPolicy(
        name="meta-policy",
        obs_dim=np.prod(env.observation_space.shape),
        action_dim=np.prod(env.action_space.shape),
        meta_batch_size=config['meta_batch_size'],
        hidden_sizes=config['hidden_sizes'],
    )

    sampler = MAMLSampler(
        env=env,
        policy=policy,
        rollouts_per_meta_task=config[
            'rollouts_per_meta_task'],  # This batch_size is confusing
        meta_batch_size=config['meta_batch_size'],
        max_path_length=config['max_path_length'],
        parallel=config['parallel'],
    )

    sample_processor = MAMLSampleProcessor(
        baseline=baseline,
        discount=config['discount'],
        gae_lambda=config['gae_lambda'],
        normalize_adv=config['normalize_adv'],
        positive_adv=config['positive_adv'],
    )

    Algo = VPGSGMRL if args.algo == 'sgmrl' else VPGMAML
    algo = Algo(policy=policy,
                inner_type=config['inner_type'],
                meta_batch_size=config['meta_batch_size'],
                num_inner_grad_steps=config['num_inner_grad_steps'],
                inner_lr=config['inner_lr'],
                learning_rate=config['learning_rate'],
                exploration=(args.algo == 'emaml'))

    trainer = Trainer(algo=algo,
                      policy=policy,
                      env=env,
                      sampler=sampler,
                      sample_processor=sample_processor,
                      n_itr=config['n_itr'],
                      num_inner_grad_steps=config['num_inner_grad_steps'])

    tester = Tester(algo=algo,
                    policy=policy,
                    env=env,
                    sampler=sampler,
                    sample_processor=sample_processor,
                    n_itr=50,
                    num_inner_grad_steps=config['num_inner_grad_steps'])

    best_itr = trainer.train(tester)
    print(best_itr)
Пример #3
0
def run_experiment(**kwargs):
    exp_dir = os.getcwd() + '/data/' + EXP_NAME
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     snapshot_mode='last_gap',
                     snapshot_gap=50)
    json.dump(kwargs,
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True,
              cls=ClassEncoder)

    # Instantiate classes
    set_seed(kwargs['seed'])

    baseline = kwargs['baseline']()

    env = normalize(kwargs['env']())  # Wrappers?

    policy = MetaGaussianMLPPolicy(
        name="meta-policy",
        obs_dim=np.prod(env.observation_space.shape),  # Todo...?
        action_dim=np.prod(env.action_space.shape),
        meta_batch_size=kwargs['meta_batch_size'],
        hidden_sizes=kwargs['hidden_sizes'],
        learn_std=kwargs['learn_std'],
        hidden_nonlinearity=kwargs['hidden_nonlinearity'],
        output_nonlinearity=kwargs['output_nonlinearity'],
    )

    # Load policy here

    sampler = MAMLSampler(
        env=env,
        policy=policy,
        rollouts_per_meta_task=kwargs['rollouts_per_meta_task'],
        meta_batch_size=kwargs['meta_batch_size'],
        max_path_length=kwargs['max_path_length'],
        parallel=kwargs['parallel'],
        envs_per_task=1,
    )

    sample_processor = MAMLSampleProcessor(
        baseline=baseline,
        discount=kwargs['discount'],
        gae_lambda=kwargs['gae_lambda'],
        normalize_adv=kwargs['normalize_adv'],
        positive_adv=kwargs['positive_adv'],
    )

    algo = ProMP(
        policy=policy,
        inner_lr=kwargs['inner_lr'],
        meta_batch_size=kwargs['meta_batch_size'],
        num_inner_grad_steps=kwargs['num_inner_grad_steps'],
        learning_rate=kwargs['learning_rate'],
        num_ppo_steps=kwargs['num_ppo_steps'],
        num_minibatches=kwargs['num_minibatches'],
        clip_eps=kwargs['clip_eps'],
        clip_outer=kwargs['clip_outer'],
        target_outer_step=kwargs['target_outer_step'],
        target_inner_step=kwargs['target_inner_step'],
        init_outer_kl_penalty=kwargs['init_outer_kl_penalty'],
        init_inner_kl_penalty=kwargs['init_inner_kl_penalty'],
        adaptive_outer_kl_penalty=kwargs['adaptive_outer_kl_penalty'],
        adaptive_inner_kl_penalty=kwargs['adaptive_inner_kl_penalty'],
        anneal_factor=kwargs['anneal_factor'],
    )

    trainer = Trainer(
        algo=algo,
        policy=policy,
        env=env,
        sampler=sampler,
        sample_processor=sample_processor,
        n_itr=kwargs['n_itr'],
        num_inner_grad_steps=kwargs['num_inner_grad_steps'],
    )

    trainer.train()