Exemplo n.º 1
0
def start_a3c(cluster, role, task_index):
    server = tf.train.Server(cluster, job_name=role, task_index=task_index)
    if role == 'ps':
        logging.warning('Parameter server started.')
        server.join()
    else:
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:{}".format(task_index),
                    cluster=cluster)):

            # Make env.
            env = gym.make('CartPole-v0')
            env.seed(1)
            env = env.unwrapped
            # Init session.
            session = tf.Session(server.target)
            # Init agent.
            agent = PPO.Agent(
                env.action_space.n, env.observation_space.shape[0], **{
                    KEY_SESSION: session,
                    KEY_MODEL_NAME: 'PPO',
                    KEY_TRAIN_EPISODE: 1000
                })
            start_game(env, agent)
Exemplo n.º 2
0
def main(_):
    # Make env.
    env = gym.make('CartPole-v0')
    env.seed(1)
    env = env.unwrapped
    # Init agent.
    agent = Agent(env.action_space.n, env.observation_space.shape[0], **{
        KEY_MODEL_NAME: 'PolicyGradient',
        KEY_TRAIN_EPISODE: 10000
    })
    start_game(env, agent)