def start_threads(self):
        # max number of episodes
        max_eps = 1e6
        envs = []
        # create 1 local enviroment for each thread
        for _ in range(NUM_THREADS):
            _env = gym_super_mario_bros.make(env_name)
            _env = JoypadSpace(_env, SIMPLE_MOVEMENT)
            env = atari_wrapper.wrap_dqn(_env)
            envs.append(env)
        # create the threads and assign them their enviroment and exploration rate
        threads = []
        for i in range(NUM_THREADS):
            thread = threading.Thread(
                target=train_thread,
                daemon=True,
                args=(self, max_eps, envs[i],
                      agent.discount_rate, self.optimizer, stats,
                      AnnealingVariable(.7, 1e-20, 10000), i))
            threads.append(thread)

        # starts the threads
        for t in threads:
            print("STARTING")
            t.start()
            time.sleep(0.5)
        try:
            [t.join() for t in threads]  # wait for threads to finish
        except KeyboardInterrupt:
            print("Exiting threads!")
Exemplo n.º 2
0
                action = self.epsilon_greedy_policy(state, exploration_rate)
                next_state, reward, done, _ = env.step(action)
                episode_reward += reward

                state = next_state
            rewards_arr[episode] = episode_reward
            stats(self, episode_reward)
            print(episode_reward)
        stats.save_stats()
        return rewards_arr

    def save_weights(self):
        print("PRINT")
        self.model.save_weights("PongDQNWeights.h5")

    def save_model(self):
        self.model.save("DqnPongModel.h5")

    def restore_weights(self):
        print("Restoring model weights Pong")
        self.model.load_weights("DqnPongModel.h5")


stats = Stats()
_env = gym.make('PongNoFrameskip-v4')
env = atari_wrapper.wrap_dqn(_env)
agent = DQNAgent()
agent.init_priority_replay(50000)
agent.train_model(max_steps=50e6, stats=stats)
env.close()
Exemplo n.º 3
0
                env.render()
                # time.sleep(0.05)
                action = self.pick_action(state, exploration_rate)
                next_state, reward, done, _ = env.step(action)
                episode_reward += reward
                state = next_state
            if callable(stats):
                stats(self, episode_reward)
            rewards_arr[episode] = episode_reward
            print(episode_reward)
        stats.save_stats()
        return rewards_arr

    def restore_weights(self):
        self.global_network.load_weights('A3CPong.h5')


test_env = gym.make(env_name)
test_env = atari_wrapper.wrap_dqn(test_env)

NUM_ACTIONS = test_env.action_space.n
OBS_SPACE = test_env.observation_space.shape[0]

state = test_env.reset()
state = np.expand_dims(state, axis=0)

stats = Stats()
agent = A3CAgent()
agent.play(test_env, stats)
test_env.close()