Пример #1
0
     hidden_nonlinearity=tf.nn.relu,
     hidden_sizes=(100, 100),
 )
 if bas == 'zero':
     baseline = ZeroBaseline(env_spec=env.spec)
 elif bas == 'linear':
     baseline = LinearFeatureBaseline(env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(env_spec=env.spec)
 algo = SensitiveTRPO(
     #algo = SensitiveVPG(
     env=env,
     policy=policy,
     baseline=baseline,
     batch_size=fast_batch_size,  # number of trajs for grad update
     max_path_length=max_path_length,
     meta_batch_size=meta_batch_size,
     num_grad_updates=num_grad_updates,
     n_itr=400,
     use_sensitive=use_sensitive,
     #optimizer_args={'tf_optimizer_args':{'learning_rate': learning_rate}},
     plot=False,
 )
 run_experiment_lite(
     algo.train(),
     n_parallel=0,
     snapshot_mode="last",
     seed=1,
     #exp_prefix='deleteme',
     #exp_name='deleteme'
     #exp_prefix='sensitive1dT5_2017_01_19',
     #exp_prefix='bugfix_sensitive0d_8tasks_T'+str(max_path_length)+'_2017_02_05',
Пример #2
0
     hidden_nonlinearity=tf.nn.relu,
     hidden_sizes=(100, 100),
 )
 if bas == 'zero':
     baseline = ZeroBaseline(env_spec=env.spec)
 elif 'linear' in bas:
     baseline = LinearFeatureBaseline(env_spec=env.spec)
 else:
     baseline = GaussianMLPBaseline(env_spec=env.spec)
 algo = SensitiveTRPO(
     env=env,
     policy=policy,
     baseline=baseline,
     batch_size=fast_batch_size,  # number of trajs for grad update
     max_path_length=max_path_length,
     meta_batch_size=meta_batch_size,
     num_grad_updates=num_grad_updates,
     n_itr=100,
     use_sensitive=use_sensitive,
     step_size=meta_step_size,
     plot=False,
 )
 run_experiment_lite(
     algo.train(),
     n_parallel=40,
     snapshot_mode="last",
     python_command='python3',
     seed=1,
     exp_prefix='vpg_sensitive_point100',
     exp_name='trposens' + str(int(use_sensitive)) + '_fbs' +
     str(fast_batch_size) + '_mbs' + str(meta_batch_size) +