Пример #1
0
def learn_on_ppo1(env_name,
                  num_timesteps,
                  seed,
                  model_path,
                  lognum,
                  old_model_path=None):
    log_dir = str(Path.cwd().parent.parent.joinpath('logs', str(lognum)))
    # win: tensorboard --logdir="E:\gitstuff\bachelor_thesis_snake\logs\x"
    logger.configure(
        dir=log_dir,
        format_strs=['stdout', 'log', 'csv', 'json', 'tensorboard'])
    # mac: go to cwd, pipenv shell, tensorboard --logdir=/Users/julian.schmitz/gitstuff/bachelor_thesis_snake/logs/1
    sess = U.make_session(num_cpu=1)
    sess.__enter__()
    set_global_seeds(seed)
    env = gym.make(env_name)
    env = bench.Monitor(env, os.path.join(logger.get_dir(), "monitor.json"))

    pi = None
    if old_model_path is not None:
        pi = cnn_policy.CnnPolicy('pi', env.observation_space,
                                  env.action_space)
        # tf.train.Saver().restore(sess, old_model_path)  # TODO does that load the model and we can go on?
        tf.train.Saver().restore(
            tf.get_default_session(), old_model_path
        )  # TODO does that load the model and we can learn on?

    my_pposgd_simple.learn(
        env,
        policy_fn=policy_fn_cnn,
        max_timesteps=num_timesteps,
        # max_episodes=1000,
        timesteps_per_actorbatch=2048,  # TODO or 4096 when obstacles
        clip_param=0.2,
        entcoeff=0.0,
        optim_epochs=10,
        optim_stepsize=3e-4,
        optim_batchsize=64,
        gamma=0.99,
        lam=0.95,
        schedule='linear',
        callback=callback,
        pi=pi)
    # env.close()
    saver = tf.train.Saver()
    saver.save(sess, model_path)
    print('saved to ' + model_path)
Пример #2
0
 def policy_fn(name, ob_space, ac_space):  #pylint: disable=W0613
     return cnn_policy.CnnPolicy(name=name,
                                 ob_space=ob_space,
                                 ac_space=ac_space)
Пример #3
0
def policy_fn_cnn(name, ob_space, ac_space):
    return cnn_policy.CnnPolicy(name=name,
                                ob_space=ob_space,
                                ac_space=ac_space,
                                kind='small')  # TODO kind='large'
Пример #4
0
 def policy_fn(name, ob_space, ac_space):
     return cnn_policy.CnnPolicy(name=name,
                                 ob_space=ob_space,
                                 ac_space=ac_space)
Пример #5
0
 def policy_fn(name, ob_space, ac_space, sess=None, placeholders=None):  # pylint: disable=W0613
     return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, sess=sess,
                                 placeholders=placeholders)
Пример #6
0
 def policy_fn(name, ob_space, ac_space):
     """Given an obs, returns an act."""
     return cnn_policy.CnnPolicy(name=name, ob_space=ob_space,
                                 ac_space=ac_space)