def main(eval_reward = False): env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False)) n_experts = 10 experts = load_latest_experts('plotting/pendulum_final', n=n_experts) dirname='data/pendulum' # dir to save logs and images irl_model = AIRLStateAction(env_spec=env.spec, expert_trajs=experts) policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) algo = IRLTRPO( env=env, policy=policy, irl_model=irl_model, n_itr=1000, batch_size=1000, max_path_length=100, discount=0.99, store_paths=True, discrim_train_itrs=50, irl_model_wt=1.0, entropy_weight=0.1, # this should be 1.0 but 0.1 seems to work better zero_environment_reward=True, baseline=LinearFeatureBaseline(env_spec=env.spec), eval_reward=True, fig_dir = dirname ) # with rllab_logdir(algo=algo, dirname='data/pendulum_gcl{}'.format(n_experts)): with rllab_logdir(algo=algo, dirname=dirname): with tf.Session(): algo.fig_dirname = dirname algo.train()
def main(): env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False)) experts = load_latest_experts('data/pendulum', n=5) #import pdb # pdb.set_trace() irl_model = AIRLStateAction(env_spec=env.spec, expert_trajs=experts) policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) algo = IRLTRPO( env=env, policy=policy, irl_model=irl_model, n_itr=200, batch_size=1000, max_path_length=100, discount=0.99, store_paths=True, discrim_train_itrs=50, irl_model_wt=1.0, entropy_weight=0.1, # this should be 1.0 but 0.1 seems to work better zero_environment_reward=True, baseline=LinearFeatureBaseline(env_spec=env.spec)) with rllab_logdir(algo=algo, dirname='data/pendulum_gcl'): with tf.Session(): algo.train()
def main(exp_name=None, fusion=False, visible_gpus='0', discount=0.99): env = TfEnv(GymEnv('Swimmer-v3', record_video=False, record_log=False)) gpu_options = tf.GPUOptions(allow_growth=True, visible_device_list=args.visible_gpus) tf_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1, gpu_options=gpu_options) experts = load_latest_experts('data/swimmer', n=5, visible_gpus=visible_gpus) irl_model = AIRLStateAction(discount=discount, env_spec=env.spec, expert_trajs=experts) policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) algo = IRLTRPO( env=env, policy=policy, irl_model=irl_model, n_itr=1000, batch_size=10000, max_path_length=1000, discount=discount, store_paths=True, discrim_train_itrs=50, irl_model_wt=1.0, entropy_weight=0.1, # this should be 1.0 but 0.1 seems to work better zero_environment_reward=True, baseline=LinearFeatureBaseline(env_spec=env.spec)) with rllab_logdir(algo=algo, dirname='data/swimmer_airl_no_disent'): with tf.Session(config=tf_config) as sess: algo.train(sess)