def play(map_index): map_npy = 'mappe_test/map_' + map_index + '.npy' plt.grid(True) maze = np.load(map_npy) exit_cell = (30, 5) #(37_27) 80_1 (30,5) 80_2 .... model_name = 'NN double augm prior 8 rays + delta location ' + map_index while True: plt.imshow(maze, cmap="binary") plt.plot(exit_cell[0], exit_cell[1], "gs", markersize=5) # exit is a big green square plt.title(map_npy) plt.show() start_cell = tuple(int(x) for x in input('start cell: ').split( )) #(20,28) (20,25) (14,5) (22,21) 80_1// (38,16) 80_2 game = Maze(maze, start_cell=start_cell, exit_cell=exit_cell, close_reward=-0.5) model = QReplayDoubleAugmPrior8(game, name=model_name, load=True) status, trajectory, time_elapsed = game.play(model, start_cell=start_cell) game.render("moves") game.play(model, start_cell=start_cell) print('*******************************************') print('status = {}'.format(status)) print('trajectory = {}'.format(trajectory)) print('time elapsed = {} seconds'.format(time_elapsed)) repeat = input('Type True to repeat: ') if repeat != "True": break
from models import * logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(asctime)s: %(message)s", datefmt="%H:%M:%S") maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1], [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]]) # 0 = free, 1 = occupied game = Maze(maze) if 0: # only show the maze game.render("moves") game.reset() if 0: # play using random model model = RandomModel(game) model.train() if 0: # train using tabular Q-learning model = QTableModel(game, name="QTableModel") h, w, _, _ = model.train(discount=0.90, exploration_rate=0.10, learning_rate=0.10, episodes=200) if 0: # train using tabular Q-learning and an eligibility trace (aka TD-lamba) model = QTableTraceModel(game)
h # force a NameError exception if h does not exist (and thus don't try to show win rate and cumulative reward) fig, (ax1, ax2) = plt.subplots(2, 1, tight_layout=True) fig.canvas.set_window_title(model.name) ax1.plot(*zip(*w)) ax1.set_xlabel("episode") ax1.set_ylabel("win rate") ax2.plot(h) ax2.set_xlabel("episode") ax2.set_ylabel("cumulative reward") plt.show() except NameError: pass plt.grid(True) plt.imshow(maze, cmap="binary") plt.show() game.render("moves") game.play(model, start_cell = start_cell) #load = False actions_counter, close_counter, time, lost = game.win_all_final(model) plt.savefig('risultato.png') logging.info('Mean length of path {}, time: {}, with modality {} | lost {}'.format(actions_counter, time, model.name, lost)) logging.info('# of close-to-obstacles states visited in all games: {} with modality {}'.format(close_counter, model.name)) plt.show() # must be placed here else the image disappears immediately at the end of the program