示例#1
0
def main(eval_reward = False):
    env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False))
    
    n_experts = 10
    experts = load_latest_experts('plotting/pendulum_final', n=n_experts)
    dirname='data/pendulum' # dir to save logs and images

    irl_model = AIRLStateAction(env_spec=env.spec, expert_trajs=experts)
    policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
    algo = IRLTRPO(
        env=env,
        policy=policy,
        irl_model=irl_model,
        n_itr=1000,
        batch_size=1000,
        max_path_length=100,
        discount=0.99,
        store_paths=True,
        discrim_train_itrs=50,
        irl_model_wt=1.0,
        entropy_weight=0.1, # this should be 1.0 but 0.1 seems to work better
        zero_environment_reward=True,
        baseline=LinearFeatureBaseline(env_spec=env.spec),
        eval_reward=True,
        fig_dir = dirname
    )

    # with rllab_logdir(algo=algo, dirname='data/pendulum_gcl{}'.format(n_experts)):
    with rllab_logdir(algo=algo, dirname=dirname):
        with tf.Session():
            algo.fig_dirname = dirname
            algo.train()
示例#2
0
def main():
    env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False))

    experts = load_latest_experts('data/pendulum', n=5)

    #import pdb
    # pdb.set_trace()

    irl_model = AIRLStateAction(env_spec=env.spec, expert_trajs=experts)
    policy = GaussianMLPPolicy(name='policy',
                               env_spec=env.spec,
                               hidden_sizes=(32, 32))
    algo = IRLTRPO(
        env=env,
        policy=policy,
        irl_model=irl_model,
        n_itr=200,
        batch_size=1000,
        max_path_length=100,
        discount=0.99,
        store_paths=True,
        discrim_train_itrs=50,
        irl_model_wt=1.0,
        entropy_weight=0.1,  # this should be 1.0 but 0.1 seems to work better
        zero_environment_reward=True,
        baseline=LinearFeatureBaseline(env_spec=env.spec))

    with rllab_logdir(algo=algo, dirname='data/pendulum_gcl'):
        with tf.Session():
            algo.train()
def main(exp_name=None, fusion=False, visible_gpus='0', discount=0.99):
    env = TfEnv(GymEnv('Swimmer-v3', record_video=False, record_log=False))

    gpu_options = tf.GPUOptions(allow_growth=True,
                                visible_device_list=args.visible_gpus)
    tf_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                               intra_op_parallelism_threads=1,
                               gpu_options=gpu_options)

    experts = load_latest_experts('data/swimmer',
                                  n=5,
                                  visible_gpus=visible_gpus)

    irl_model = AIRLStateAction(discount=discount,
                                env_spec=env.spec,
                                expert_trajs=experts)
    policy = GaussianMLPPolicy(name='policy',
                               env_spec=env.spec,
                               hidden_sizes=(32, 32))
    algo = IRLTRPO(
        env=env,
        policy=policy,
        irl_model=irl_model,
        n_itr=1000,
        batch_size=10000,
        max_path_length=1000,
        discount=discount,
        store_paths=True,
        discrim_train_itrs=50,
        irl_model_wt=1.0,
        entropy_weight=0.1,  # this should be 1.0 but 0.1 seems to work better
        zero_environment_reward=True,
        baseline=LinearFeatureBaseline(env_spec=env.spec))

    with rllab_logdir(algo=algo, dirname='data/swimmer_airl_no_disent'):
        with tf.Session(config=tf_config) as sess:
            algo.train(sess)