示例#1
0
prev_snapshot_dir = logger.get_snapshot_dir()
prev_mode = logger.get_snapshot_mode()
logger.set_snapshot_dir(log_dir)
logger.set_snapshot_mode("gaplast")
logger.set_snapshot_gap(100)
logger.set_log_tabular_only(False)
logger.push_prefix("[%s] " % "FixMapStartState")

from Algo import parallel_sampler
parallel_sampler.initialize(n_parallel=1)
parallel_sampler.set_seed(0)

policy = CategoricalConvPolicy(
    env_spec=env.spec,
    name="ConvNet",
    conv_filters=[3, 3, 3, 3],
    conv_filter_sizes=[3, 5, 3, 3],
    conv_strides=[1, 1, 1, 1],
    conv_pads=['SAME', 'SAME', 'SAME', 'SAME'],
)

baseline = LinearFeatureBaseline(env_spec=env.spec)

with tf.Session() as sess:

    algo = VPG_t(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=2048,
        max_path_length=env._wrapped_env.params['traj_limit'],
        n_itr=20000,