def __init__(self): coins = generate_coins(testing_map.data) self.hero_position = generate_hero(testing_map.data) episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position) self.game = Game(episodeSnapshot, True) self.env = Maze(episode_threshold=None)
class Tester(): def __init__(self): coins = generate_coins(testing_map.data) self.hero_position = generate_hero(testing_map.data) episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position) self.game = Game(episodeSnapshot, True) self.env = Maze(episode_threshold=None) def on_coin_grabbed(self, maze_position): """Works as a callback when a coin is grabbed. A new one is generated based on this event. Args: maze_position (tuple): a position of the currently collected coin """ coin = generate_coins(testing_map.data, count=1, grabbed_coin_position=maze_position)[0] self.game.append_coin(coin) self.env.update_reward_matrix() def test(self): """Testing process of the agent differs a bit from the training one. There isn't a explicit way of how to end the episode unless the agent steps out of the road, so coins are generated automatically in an infinite loop. """ agent.load_pretrained_model() obs = self.env.reset(testing_map.data, self.hero_position, self.on_coin_grabbed) done = False actions = [] reward_sum = 0 visualization_done = False while not visualization_done: if not done: action = agent.choose_action(obs) next_obs, reward, done = self.env.step(action) reward_sum += reward obs = next_obs actions.append(action) if actions: visualization_done, _ = self.game.play(Move(actions.pop(0))) else: visualization_done, _ = self.game.play() for event in pygame.event.get(): if event.type == pygame.QUIT: visualization_done = True done = True self.game.gameOver(reward_sum)
def play(map_index): map_npy = 'mappe_test/map_' + map_index + '.npy' plt.grid(True) maze = np.load(map_npy) exit_cell = (30, 5) #(37_27) 80_1 (30,5) 80_2 .... model_name = 'NN double augm prior 8 rays + delta location ' + map_index while True: plt.imshow(maze, cmap="binary") plt.plot(exit_cell[0], exit_cell[1], "gs", markersize=5) # exit is a big green square plt.title(map_npy) plt.show() start_cell = tuple(int(x) for x in input('start cell: ').split( )) #(20,28) (20,25) (14,5) (22,21) 80_1// (38,16) 80_2 game = Maze(maze, start_cell=start_cell, exit_cell=exit_cell, close_reward=-0.5) model = QReplayDoubleAugmPrior8(game, name=model_name, load=True) status, trajectory, time_elapsed = game.play(model, start_cell=start_cell) game.render("moves") game.play(model, start_cell=start_cell) print('*******************************************') print('status = {}'.format(status)) print('trajectory = {}'.format(trajectory)) print('time elapsed = {} seconds'.format(time_elapsed)) repeat = input('Type True to repeat: ') if repeat != "True": break
def trial(robot: Agent) -> List[int]: maze = Maze() move_history = [] for i in range(5000): if i % 1000 == 0: print(i) while not maze.is_complete(): state, _ = maze.get_state_and_reward() action = robot.choose_action(state, maze.allowed_states[state]) maze.update_maze(action) state, reward = maze.get_state_and_reward() robot.update_state_history(state, reward) if maze.steps > 1000: maze.robot_position = State(5, 5) robot.learn() move_history.append(maze.steps) maze.reset() return move_history
def run_simulation(alpha, watch=False): maze = Maze(__ACTION_SPACE) robot = ( Agent( __ACTION_SPACE, maze.allowed_states, alpha=alpha, epsilon=__EPSILON ) ) step_totals = [] if watch: print("Starting simulation...") time.sleep(0.1) else: print("Beginning simulation with:") print(f"Robot <alpha: {robot.alpha}, epsilon: {robot.epsilon}>") print("Maze:") maze.print_maze() print("Starting episodes...") for i in range(__NUM_EPISODES): if i % 1000 == 0 and i > 0: print(f"{i} episodes completed...") while not maze.is_game_over(): run_episode(maze, robot) if watch: print(f"Robot <alpha: {robot.alpha}, epsilon: {robot.epsilon}>") print(f"Episode {i}") maze.print_maze() print(f"Number of steps: {maze.num_steps}") time.sleep(0.020) os.system("clear") robot.learn() step_totals.append(maze.num_steps) # reset maze for next episode maze = Maze(__ACTION_SPACE) print("Simulation complete.") print("----------") return step_totals
def train_interactive(): regenerate = True maze_matrix = None while regenerate: maze_matrix = generate_prims_maze_matrix() environment_maze = Maze(maze_matrix) show(environment_maze) regenerate = input("Should regenerate?") == "y" if input("Should Train?") == "y": model = build_model(maze_matrix.size) weights_file = input("File to load from (h5)?") if weights_file: print("loading weights from file: %s" % (weights_file, )) model.load_weights(weights_file) train(model, environment_maze, epochs=1000, max_memory=8 * maze_matrix.size, data_size=32) save_model(model, input("Output filename?"))
def train_recurring(): name = "super_model" file_name = "super_model.h5" first = True index = 0 path = "TestTwo/" name = "TT" while True: sample_maze = generate_prims_maze_matrix(11, 11) environment_maze = Maze(sample_maze) show(environment_maze, file_name="%s%s%d" % (path, name, index)) model = build_model(sample_maze.size) if not first: model.load_weights(file_name) first = False train(model, environment_maze, epochs=1000, max_memory=8 * sample_maze.size, data_size=32) save_model(model, name) index += 1
def test_sarsa_lambda_with_epsilon_greedy(): env = Maze() agent = SarsaLambdaAgentWithEpsilonGreedy(act_n=4) rs = sarsa_demo(env, agent, 2000) plt.plot(range(2000), rs), plt.grid(), plt.show()
def test_sarsa_lambda_with_ucb1(): env = Maze() agent = SarsaLambdaAgentWithUCB1(act_n=4) rs = sarsa_demo(env, agent, 2000) plt.plot(range(2000), rs), plt.grid(), plt.show()
def test_sarsa(): env = Maze() agent = SarsaAgent(act_n=4) rs = sarsa_demo(env, agent, 2000) plt.plot(range(2000), rs), plt.grid(), plt.show()
navigation_map = "" line = "" for column in range(len(maze.content)): for row in range(len(maze.content[column])): if maze.content[row, column] == wall: line += ICONS["wall"] + " " else: # Determine the direction of the arrow state = (row, column) valid_next_states = maze.get_valid_next_states(state) best_next_state = get_best_next_state(Q, state, valid_next_states) direction = get_direction(state, best_next_state) line += ICONS[direction] + " " navigation_map += line + "\n" line = "" return navigation_map if __name__ == "__main__": input_size = int(input("Size? ")) size = input_size if input_size else 11 maze = Maze(generate_prims_maze_matrix(size, size)) print(maze) model = build_model(maze.width, maze.height) train_for_random_start(model, maze) # test_all(model, maze) print(get_navigation_map(model, maze))
['%', '%', '%', '%', '%', '%', '%', '%', '%', '%'], ['%', '%', '%', '%', '%', '%', '%', '%', '%', '%'] ]) ) HIDDEN_SIZE = 256 BATCH_SIZE = 20 GAMMA = 0.98 PERCENTILE = 25 SEED = 1234 LEARNING_RATE = 0.003 EPISODES_THRESHOLD = 200 DESIRED_REWARD = 3.75 MAP_ITERATIONS = 256 env = Maze() layer_sizes = [env.observations_count, HIDDEN_SIZE, env.actions_count] agent = Agent(layer_sizes, SEED, LEARNING_RATE) Episode = namedtuple('Episode', field_names = ['reward','steps']) EpisodeStep = namedtuple('EpisodeStep',field_names = ['observation','action']) def iterate_batches(maze, hero_position, epsilon): """The main purpose of the method is to iterate through individual episodes during the training process. Those are then returned in batch. Args: maze (matrix): NumPy matrix representing map (environment) hero_position (tuple): current hero's position on the map epsilon (float): a scalar used for epsilon-greedy Yields:
def test_monte_carlo(): env = Maze() agent = MonteCarloAgent(act_n=4) rs = monte_carlo_demo(env, agent, 2000) plt.plot(range(2000), rs), plt.grid(), plt.show()
import numpy as np from environment import Maze from agent import Agent import matplotlib.pyplot as plt if __name__ == '__main__': maze = Maze() robot = Agent(maze, alpha=0.1, randomFactor=0.25) moveHistory = [] for i in range(5000): if i % 1000 == 0: print(i) while not maze.isGameOver(): state, _ = maze.getStateAndReward() action = robot.chooseAction(state, maze.allowedStates[state]) maze.updateMaze(action) state, reward = maze.getStateAndReward() robot.updateStateHistory(state, reward) if maze.steps > 1000: maze.robotPosition = (5, 5) robot.learn() moveHistory.append(maze.steps) maze = Maze() maze = Maze() robot = Agent(maze, alpha=0.99, randomFactor=0.25) moveHistory2 = [] for i in range(5000): if i % 1000 == 0: print(i) while not maze.isGameOver():
# swap observation observation = observation_ # break while loop when end of this episode if done: break step += 1 # end of game print('game over') env.destroy() if __name__ == "__main__": # maze game env = Maze() # RL = DeepQNetwork(env.n_actions, env.n_features, # learning_rate=0.01, # reward_decay=0.9, # e_greedy=0.9, # replace_target_iter=200, # memory_size=2000 # ) # param tuning by hand, best version for now RL = DeepQNetwork(env.n_actions, env.n_features, learning_rate=0.005, reward_decay=0.8, e_greedy=0.8,
patient_number = '0866' map_number = str(88) map_index = str(1) map_name = 'mappe_test/map_'+patient_number+'_'+map_number+'_'+map_index maze = np.load(map_name+'.npy') plotmap(map_name+'.npy') load = False if __name__ == '__main__': start_cell = tuple(int(x) for x in input('start cell: ').split()) exit_cell = tuple(int(x) for x in input('exit cell: ').split()) game = Maze(maze, start_cell=start_cell, exit_cell=exit_cell, close_reward = -0.5) if 0: # train using a neural network with experience replay (also saves the resulting model) model = QReplayDoubleAugmPrior4(game, name = "NN double augm prior 4 rays") h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=700, max_memory=maze.size * 4) if 1: # train using a neural network with experience replay (also saves the resulting model) model_name = "NN double augm prior 8 rays + delta location "+ patient_number +'_'+map_number+'_'+map_index model = QReplayDoubleAugmPrior8(game, name = model_name, load = False) h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=1600, max_memory=maze.size * 4) if 0: # train using a neural network with experience replay (also saves the resulting model) model = QReplayDoubleAugmPrior3(game, name = "NN double augm prior 3 rays + delta location") h, w, _, _ = model.train(discount=0.80, exploration_rate=0.60, episodes=300, max_memory=maze.size * 4) if 0: # train using a neural network with experience replay (also saves the resulting model)
movements.append(n_movements) print('n_movements: {0}'.format(n_movements)) break print('Game is finished!') print(agent.q_table) plot_rewards_movements() return def plot_rewards_movements(): plt.figure() plt.plot(list(range(EPISODE_COUNT)), movements) plt.xlabel('Episode') plt.ylabel('#Movements') plt.show() return if __name__ == '__main__': environment = Maze(action_space=ACTION_SPACE, pit_reward=PIT_REWARD, destination_reward=DESTINATION_REWARD, wall_reward=WALL_REWARD) agent = Agent(n_states=environment.width * environment.height, action_space=ACTION_SPACE, alpha=ALPHA, gamma=GAMMA, epsilon=EPSILON) environment.window.after(10, run_experiment) environment.window.mainloop()
import matplotlib.pyplot as plt from environment import Maze from models import * logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(asctime)s: %(message)s", datefmt="%H:%M:%S") maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1], [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]]) # 0 = free, 1 = occupied game = Maze(maze) if 0: # only show the maze game.display = True game.reset() if 0: # play using random model model = RandomModel(game) model.train() if 0: # train using tabular Q-learning model = QTableModel(game) h, _, _ = model.train(discount=0.90, exploration_rate=0.10, learning_rate=0.10, episodes=10000)
def update(): for episode in range(100): # initial observation observation = env.reset() while True: env.render() # RL choose action based on observation action = RL.choose_action(str(observation)) # RL take action and get next observation and reward observation_, reward, done = env.step(action) # RL learn from this transition RL.learn(str(observation), action, reward, str(observation_)) # swap observation observation = observation_ if done: break # end of navigation print('Reinforcement Learning done successful') env.destroy() if __name__ == "__main__": env = Maze() RL = QLearningTable(actions=list(range(env.n_actions))) env.after(100, update) env.mainloop()
def test_single(model, maze_width=11): maze_matrix = generate_prims_maze_matrix(maze_width) maze = Maze(maze_matrix) initial_cell = random.choice(maze.free_cells) maze.reset(initial_cell) return play_game(model, maze, initial_cell), maze
import numpy as np from environment import Maze from agent import Agent from constants import maze_configuration if __name__ == '__main__': maze = Maze(maze_configuration) robot = Agent(maze.allowed_states, alpha=0.1, exploration_factor=0.25) move_history = [] robot.printRewardMap() for episode in range(5000): if episode % 1000 == 0: print(episode) robot.printRewardMap() while not maze.isGameOver(): state, _ = maze.getStateAndReward() action = robot.chooseAction(state, maze.allowed_states[state]) maze.updateMaze(action) state, reward = maze.getStateAndReward() robot.updateStateHistory(state, reward) if maze.steps > 1000: maze.robot_position = (5, 5) robot.learn() move_history.append(maze.steps) maze = Maze(maze_configuration)
def test_q_learn(): env = Maze() agent = QLearnAgent(act_n=4) rs = q_learn_demo(env, agent, 2000) plt.plot(range(2000), rs), plt.grid(), plt.show()
import matplotlib.pyplot as plt from environment import Maze from models import * logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(asctime)s: %(message)s", datefmt="%H:%M:%S") maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1], [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]]) # 0 = free, 1 = occupied game = Maze(maze) if 0: # only show the maze game.render("moves") game.reset() if 0: # play using random model model = RandomModel(game) model.train() if 0: # train using tabular Q-learning model = QTableModel(game, name="QTableModel") h, w, _, _ = model.train(discount=0.90, exploration_rate=0.10, learning_rate=0.10, episodes=200)
while not maze.is_complete(): state, _ = maze.get_state_and_reward() action = robot.choose_action(state, maze.allowed_states[state]) maze.update_maze(action) state, reward = maze.get_state_and_reward() robot.update_state_history(state, reward) if maze.steps > 1000: maze.robot_position = State(5, 5) robot.learn() move_history.append(maze.steps) maze.reset() return move_history if __name__ == '__main__': allowed_states = Maze().allowed_states robot_1 = Agent(allowed_states, alpha=0.1, random_factor=0.25) move_history_1 = trial(robot_1) robot_2 = Agent(allowed_states, alpha=0.99, random_factor=0.25) move_history_2 = trial(robot_2) plt.subplot(211) plt.semilogy(move_history_1, 'b-') plt.xlabel('episode') plt.ylabel('steps to solution') plt.legend(['alpha=0.1']) plt.subplot(212) plt.semilogy(move_history_2, 'r-') plt.xlabel('episode')