Пример #1
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']
    setup(seed, n_parallel, log_dir)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 100
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    env = TfEnv(normalize(PointEnvRandGoal()))

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    algo.train()
Пример #2
0
def experiment(variant):

    fast_learning_rate = variant['fast_learning_rate']

    fast_batch_size = variant['fast_batch_size']
    meta_batch_size = variant['meta_batch_size']
    max_path_length = variant['max_path_length']
    num_grad_updates = variant['num_grad_updates']
    meta_step_size = variant['meta_step_size']

    env = TfEnv(BallEnv())

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=(100, 100),
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    algo.train()
Пример #3
0
def main():
  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True # pylint: disable=E1101
  with tf.Session(config=config):
  # env = TfEnv(normalize(GridWorldEnvRand('four-state')))
    env = DummyVecEnv([make_env])
    policy = MAMLCategoricalMLPPolicy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=fast_learning_rate,
            prob_network = nature_cnn
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100,100),
        )
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = MAMLTRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=fast_batch_size, # number of trajs for grad update
            max_path_length=max_path_length,
            meta_batch_size=meta_batch_size,
            num_grad_updates=num_grad_updates,
            n_itr=800,
            use_maml=use_maml,
            step_size=meta_step_size,
            plot=False,
        )
    run_experiment_lite(
            algo.train(),
            n_parallel=4,
            snapshot_mode="last",
            seed=1,
            exp_prefix='trpo_maml_4state',
            exp_name='trpo_maml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates),
            plot=False,
        )
Пример #4
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']

    setup(seed, n_parallel, log_dir)
    expertDataLoc = variant['expertDataLoc']
    expertDataItr = variant['expertDataItr']

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    regionSize = variant['regionSize']

    if regionSize == '20X20':
        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_v1.pkl'

    else:
        assert regionSize == '60X30'
        tasksFile = '/root/code/multiworld/multiworld/envs/goals/PickPlace_60X30.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))
    envType = variant['envType']

    if envType == 'Push':
        baseEnv = SawyerPushEnv(tasks=tasks)
    else:
        assert (envType) == 'PickPlace'

        baseEnv = SawyerPickPlaceEnv(tasks=tasks)

    env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation']))
    env = TfEnv(NormalizedBoxEnv(env))
    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
        numExpertPolicies=20,
        expertDataInfo={
            'expert_loc': expertDataLoc,
            'expert_itr': expertDataItr
        })

    algo.train()
Пример #5
0
            )
            if bas == 'zero':
                baseline = ZeroBaseline(env_spec=env.spec)
            elif 'linear' in bas:
                baseline = LinearFeatureBaseline(env_spec=env.spec)
            else:
                baseline = GaussianMLPBaseline(env_spec=env.spec)
            algo = MAMLTRPO(
                env=env,
                policy=policy,
                baseline=baseline,
                batch_size=fast_batch_size, # number of trajs for grad update
                max_path_length=max_path_length,
                meta_batch_size=meta_batch_size,
                num_grad_updates=num_grad_updates,
                n_itr=100,
                use_maml=use_maml,
                step_size=meta_step_size,
                plot=False,
            )
            run_experiment_lite(
                algo.train(),
                n_parallel=1,
                snapshot_mode="last",
                python_command='python3',
                seed=1,
                exp_prefix='vpg_maml_point100',
                exp_name='trpomaml'+str(int(use_maml))+'_fbs'+str(fast_batch_size)+'_mbs'+str(meta_batch_size)+'_flr_' + str(fast_learning_rate) + 'metalr_' + str(meta_step_size) +'_step1'+str(num_grad_updates),
                plot=False,
            )
Пример #6
0
    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=v['fast_batch_size'], # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=v['meta_batch_size'],
        num_grad_updates=num_grad_updates,
        n_itr=800,
        use_maml=use_maml,
        step_size=v['meta_step_size'],
        plot=False,
    )

    run_experiment_lite(
        algo.train(frac=v.gpu_frac),
        exp_prefix='maml_mdp', 
        exp_name='N%d_mbs%d' % (v['n_episodes'], v['meta_batch_size']),
        # Number of parallel workers for sampling
        n_parallel=1,
        # Only keep the snapshot parameters for the last iteration
        snapshot_mode="gap",
        env={'CUDA_VISIBLE_DEVICES' : str(FLAGS.devices)},
        snapshot_gap=25,
        sync_s3_pkl=True,
        # Specifies the seed for the experiment. If this is not provided, a random seed
        # will be used
        seed=v["seed"],
        mode="local",
        #mode="ec2",
        variant=v,
Пример #7
0
def experiment(variant):

    seed = variant['seed']
    n_parallel = variant['n_parallel']
    log_dir = variant['log_dir']
    setup(seed, n_parallel, log_dir)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    tasksFile = '/root/code/multiworld/multiworld/envs/goals/Door_60X20X20.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))

    baseEnv = SawyerDoorOpenEnv(tasks=tasks)

    env = FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=['state_observation']))

    env = TfEnv(NormalizedBoxEnv(env))

    policy = MAMLGaussianMLPPolicy(
        name="policy",
        env_spec=env.spec,
        grad_step_size=fast_learning_rate,
        hidden_nonlinearity=tf.nn.relu,
        hidden_sizes=variant['hidden_sizes'],
    )

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    # import os

    # saveDir = variant['saveDir']

    # if os.path.isdir(saveDir)==False:
    #     os.mkdir(saveDir)

    # logger.set_snapshot_dir(saveDir)
    # #logger.set_snapshot_gap(20)
    # logger.add_tabular_output(saveDir+'progress.csv')

    algo.train()
Пример #8
0
def experiment(variant):

    seed = variant['seed']

    tf.set_random_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    fast_learning_rate = variant['flr']

    fast_batch_size = variant[
        'fbs']  # 10 works for [0.1, 0.2], 20 doesn't improve much for [0,0.2]
    meta_batch_size = 20  # 10 also works, but much less stable, 20 is fairly stable, 40 is more stable
    max_path_length = 150
    num_grad_updates = 1
    meta_step_size = variant['mlr']

    regionSize = variant['regionSize']

    if regionSize == '20X20':

        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_20X20_6_8.pkl'

    else:
        assert regionSize == '60X30'

        tasksFile = '/root/code/multiworld/multiworld/envs/goals/pickPlace_60X30.pkl'

    tasks = pickle.load(open(tasksFile, 'rb'))

    envType = variant['envType']

    if envType == 'Push':

        baseEnv = SawyerPushEnv(tasks=tasks)
    else:
        assert (envType) == 'PickPlace'

        baseEnv = SawyerPickPlaceEnv(tasks=tasks)
    env = FinnMamlEnv(
        FlatGoalEnv(baseEnv,
                    obs_keys=['state_observation', 'state_desired_goal']))

    env = TfEnv(NormalizedBoxEnv(env))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = MAMLTRPO(
        env=env,
        policy=None,
        load_policy=variant['init_param_file'],
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,
        num_grad_updates=num_grad_updates,
        n_itr=1000,
        use_maml=True,
        step_size=meta_step_size,
        plot=False,
    )

    import os

    saveDir = variant['saveDir']

    if os.path.isdir(saveDir) == False:
        os.mkdir(saveDir)

    logger.set_snapshot_dir(saveDir)
    logger.add_tabular_output(saveDir + 'progress.csv')

    algo.train()