示例#1
0
def trial(robot: Agent) -> List[int]:
    maze = Maze()
    move_history = []
    for i in range(5000):
        if i % 1000 == 0:
            print(i)
        while not maze.is_complete():
            state, _ = maze.get_state_and_reward()
            action = robot.choose_action(state, maze.allowed_states[state])
            maze.update_maze(action)
            state, reward = maze.get_state_and_reward()
            robot.update_state_history(state, reward)
            if maze.steps > 1000:
                maze.robot_position = State(5, 5)
        robot.learn()
        move_history.append(maze.steps)
        maze.reset()
    return move_history
示例#2
0
import numpy as np
from environment import Maze
from agent import Agent
from constants import maze_configuration

if __name__ == '__main__':
    maze = Maze(maze_configuration)
    robot = Agent(maze.allowed_states, alpha=0.1, exploration_factor=0.25)
    move_history = []
    robot.printRewardMap()

    for episode in range(5000):
        if episode % 1000 == 0:
            print(episode)
            robot.printRewardMap()

        while not maze.isGameOver():
            state, _ = maze.getStateAndReward()
            action = robot.chooseAction(state, maze.allowed_states[state])

            maze.updateMaze(action)

            state, reward = maze.getStateAndReward()
            robot.updateStateHistory(state, reward)
            if maze.steps > 1000:
                maze.robot_position = (5, 5)

        robot.learn()
        move_history.append(maze.steps)
        maze = Maze(maze_configuration)