示例#1
0
def play(map_index):
    map_npy = 'mappe_test/map_' + map_index + '.npy'
    plt.grid(True)
    maze = np.load(map_npy)
    exit_cell = (30, 5)  #(37_27) 80_1 (30,5) 80_2 ....
    model_name = 'NN double augm prior 8 rays +  delta location ' + map_index
    while True:

        plt.imshow(maze, cmap="binary")
        plt.plot(exit_cell[0], exit_cell[1], "gs",
                 markersize=5)  # exit is a big green square
        plt.title(map_npy)
        plt.show()
        start_cell = tuple(int(x) for x in input('start cell: ').split(
        ))  #(20,28) (20,25) (14,5) (22,21) 80_1// (38,16) 80_2
        game = Maze(maze,
                    start_cell=start_cell,
                    exit_cell=exit_cell,
                    close_reward=-0.5)
        model = QReplayDoubleAugmPrior8(game, name=model_name, load=True)
        status, trajectory, time_elapsed = game.play(model,
                                                     start_cell=start_cell)
        game.render("moves")
        game.play(model, start_cell=start_cell)
        print('*******************************************')
        print('status = {}'.format(status))
        print('trajectory = {}'.format(trajectory))
        print('time elapsed = {} seconds'.format(time_elapsed))
        repeat = input('Type True to repeat: ')
        if repeat != "True":
            break
示例#2
0
from models import *

logging.basicConfig(level=logging.INFO,
                    format="%(levelname)s: %(asctime)s: %(message)s",
                    datefmt="%H:%M:%S")

maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0],
                 [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0],
                 [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1],
                 [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0,
                                            0]])  # 0 = free, 1 = occupied

game = Maze(maze)

if 0:  # only show the maze
    game.render("moves")
    game.reset()

if 0:  # play using random model
    model = RandomModel(game)
    model.train()

if 0:  # train using tabular Q-learning
    model = QTableModel(game, name="QTableModel")
    h, w, _, _ = model.train(discount=0.90,
                             exploration_rate=0.10,
                             learning_rate=0.10,
                             episodes=200)

if 0:  # train using tabular Q-learning and an eligibility trace (aka TD-lamba)
    model = QTableTraceModel(game)
示例#3
0
        h  # force a NameError exception if h does not exist (and thus don't try to show win rate and cumulative reward)
        fig, (ax1, ax2) = plt.subplots(2, 1, tight_layout=True)
        fig.canvas.set_window_title(model.name)
        ax1.plot(*zip(*w))
        ax1.set_xlabel("episode")
        ax1.set_ylabel("win rate")
        ax2.plot(h)
        ax2.set_xlabel("episode")
        ax2.set_ylabel("cumulative reward")
        plt.show()
    except NameError:
        pass
    plt.grid(True)
    plt.imshow(maze, cmap="binary")
    plt.show()
    game.render("moves")
    game.play(model, start_cell = start_cell)

    #load = False
    actions_counter, close_counter, time, lost = game.win_all_final(model)


    plt.savefig('risultato.png')

    logging.info('Mean length of path {}, time: {}, with modality {} | lost {}'.format(actions_counter, time, model.name, lost))

    logging.info('# of close-to-obstacles states visited in all games: {} with modality {}'.format(close_counter, model.name))


    plt.show()  # must be placed here else the image disappears immediately at the end of the program