示例#1
0
def trial(robot: Agent) -> List[int]:
    maze = Maze()
    move_history = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.is_complete():
            state, _ = maze.get_state_and_reward()
            action = robot.choose_action(state, maze.allowed_states[state])
            maze.update_maze(action)
            state, reward = maze.get_state_and_reward()
            robot.update_state_history(state, reward)
            if maze.steps > 1000:
                maze.robot_position = State(5, 5)
        robot.learn()
        move_history.append(maze.steps)
        maze.reset()
    return move_history
示例#2
0
class Tester():
    def __init__(self):             
        coins = generate_coins(testing_map.data)
        self.hero_position = generate_hero(testing_map.data)
        episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position)

        self.game = Game(episodeSnapshot, True)
        self.env = Maze(episode_threshold=None)

    def on_coin_grabbed(self, maze_position):
        """Works as a callback when a coin is grabbed. A new one is generated based on this event.

        Args:
            maze_position (tuple): a position of the currently collected coin
        """
        coin = generate_coins(testing_map.data, count=1, grabbed_coin_position=maze_position)[0]

        self.game.append_coin(coin)
        self.env.update_reward_matrix()

    def test(self):
        """Testing process of the agent differs a bit from the training one. There isn't a explicit way of how to end the episode unless the agent steps out of the road, so coins are generated automatically in an infinite loop.
        """
        agent.load_pretrained_model()

        obs = self.env.reset(testing_map.data, self.hero_position, self.on_coin_grabbed)
        done = False
        actions = []
        reward_sum = 0

        visualization_done = False

        while not visualization_done:
            if not done:
                action = agent.choose_action(obs)
                next_obs, reward, done = self.env.step(action)
                reward_sum += reward

                obs = next_obs
                actions.append(action)

            if actions:
                visualization_done, _ = self.game.play(Move(actions.pop(0)))
            else:
                visualization_done, _ = self.game.play()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    visualization_done = True
                    done = True

        self.game.gameOver(reward_sum)
示例#3
0
logging.basicConfig(level=logging.INFO,
                    format="%(levelname)s: %(asctime)s: %(message)s",
                    datefmt="%H:%M:%S")

maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0],
                 [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0],
                 [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1],
                 [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0,
                                            0]])  # 0 = free, 1 = occupied

game = Maze(maze)

if 0:  # only show the maze
    game.render("moves")
    game.reset()

if 0:  # play using random model
    model = RandomModel(game)
    model.train()

if 0:  # train using tabular Q-learning
    model = QTableModel(game, name="QTableModel")
    h, w, _, _ = model.train(discount=0.90,
                             exploration_rate=0.10,
                             learning_rate=0.10,
                             episodes=200)

if 0:  # train using tabular Q-learning and an eligibility trace (aka TD-lamba)
    model = QTableTraceModel(game)
    h, w, _, _ = model.train(discount=0.90,
示例#4
0
def test_single(model, maze_width=11):
    maze_matrix = generate_prims_maze_matrix(maze_width)
    maze = Maze(maze_matrix)
    initial_cell = random.choice(maze.free_cells)
    maze.reset(initial_cell)
    return play_game(model, maze, initial_cell), maze