def agent_worker(config, policy, learner_w_queue, global_episode, n_agent, log_dir, training_on, replay_queue, update_step): agent = Agent(config, policy, global_episode=global_episode, n_agent=n_agent, log_dir=log_dir) agent.run(training_on, replay_queue, learner_w_queue, update_step)
def agent_worker(config, policy, learner_w_queue, global_episode, i, agent_type, experiment_dir, training_on, replay_queue, update_step): agent = Agent(config, policy=policy, global_episode=global_episode, n_agent=i, agent_type=agent_type, log_dir=experiment_dir) agent.run(training_on, replay_queue, learner_w_queue, update_step)
def main(): env = gym.make(FLAGS.env_name) agent = Agent(num_actions=env.action_space.n, config=FLAGS) if FLAGS.train: # Train mode for _ in range(FLAGS.num_episodes): terminal = False observation = env.reset() for _ in range(random.randint(1, FLAGS.no_op_steps)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing state = agent.get_initial_state(observation, last_observation) while not terminal: last_observation = observation action = agent.get_action(state) observation, reward, terminal, _ = env.step(action) # env.render() processed_observation = preprocess(observation, last_observation) state = agent.run(state, action, reward, terminal, processed_observation) else: # Test mode # env.monitor.start(ENV_NAME + '-test') for _ in range(FLAGS.num_episodes_at_test): terminal = False observation = env.reset() for _ in range(random.randint(1, FLAGS.no_op_steps)): last_observation = observation observation, _, _, _ = env.step(0) # Do nothing state = agent.get_initial_state(observation, last_observation) while not terminal: last_observation = observation action = agent.get_action_at_test(state) observation, _, terminal, _ = env.step(action) env.render() processed_observation = preprocess(observation, last_observation) state = np.append(state[1:, :, :], processed_observation, axis=0)