示例#1
0
def agent_worker(config, policy, learner_w_queue, global_episode, n_agent,
                 log_dir, training_on, replay_queue, update_step):
    agent = Agent(config,
                  policy,
                  global_episode=global_episode,
                  n_agent=n_agent,
                  log_dir=log_dir)
    agent.run(training_on, replay_queue, learner_w_queue, update_step)
示例#2
0
def agent_worker(config, policy, learner_w_queue, global_episode, i, agent_type,
                 experiment_dir, training_on, replay_queue, update_step):
    agent = Agent(config,
                  policy=policy,
                  global_episode=global_episode,
                  n_agent=i,
                  agent_type=agent_type,
                  log_dir=experiment_dir)
    agent.run(training_on, replay_queue, learner_w_queue, update_step)
def main():
    env = gym.make(FLAGS.env_name)
    agent = Agent(num_actions=env.action_space.n, config=FLAGS)

    if FLAGS.train:  # Train mode
        for _ in range(FLAGS.num_episodes):
            terminal = False
            observation = env.reset()
            for _ in range(random.randint(1, FLAGS.no_op_steps)):
                last_observation = observation
                observation, _, _, _ = env.step(0)  # Do nothing
            state = agent.get_initial_state(observation, last_observation)
            while not terminal:
                last_observation = observation
                action = agent.get_action(state)
                observation, reward, terminal, _ = env.step(action)
                # env.render()
                processed_observation = preprocess(observation, last_observation)
                state = agent.run(state, action, reward, terminal, processed_observation)
    else:  # Test mode
        # env.monitor.start(ENV_NAME + '-test')
        for _ in range(FLAGS.num_episodes_at_test):
            terminal = False
            observation = env.reset()
            for _ in range(random.randint(1, FLAGS.no_op_steps)):
                last_observation = observation
                observation, _, _, _ = env.step(0)  # Do nothing
            state = agent.get_initial_state(observation, last_observation)
            while not terminal:
                last_observation = observation
                action = agent.get_action_at_test(state)
                observation, _, terminal, _ = env.step(action)
                env.render()
                processed_observation = preprocess(observation, last_observation)
                state = np.append(state[1:, :, :], processed_observation, axis=0)