def experiment(test_game, num_experiments): """ Main experiment method that runs the Value Iteration experiments and prints results works by learning a model x number of times. the average number of moves per policy is then created and averaged per experiment prints and returns the average number of episodes to reach the goal along with the learned policy. """ average_number_of_moves_with_policy = [] for x in range(num_experiments): # Learn Policy vi = ValueIteration(test_game) policy_and_num_iterations = vi.value_iteration() policy = policy_and_num_iterations[0] print(policy) avg_num_steps = 0 for itter in range(100): num_steps = vi.execute_policy(policy) avg_num_steps += num_steps avg_num_steps /= 100.0 average_number_of_moves_with_policy.append(avg_num_steps) total_average_num_steps = sum(average_number_of_moves_with_policy) / num_experiments print("Total Average Number of Steps: {}".format(total_average_num_steps)) return total_average_num_steps
def main(): #read in results file results = open("results.txt","r") questions = [] for line in results: line = line.replace("\n","") line = line.replace("\r","") questions.append(line.split(",")) windows = [] #do queries for each line in the file for q in questions: window = tk.Tk() grid = Grid('gridConf.txt') if(q[3] == "MDP"): valueIteration = ValueIteration(grid) grid = valueIteration.runValueIteration() elif(q[3] == "RL"): qValueLearning = QValueLearning(grid) grid = qValueLearning.runQValueLearning() gridPolicies = grid.get_policies_() terminal_states = grid.terminal boulder_states = grid.boulder answer = "" if(q[4] == "stateValue"): answer = grid.gridStates[int(q[1])][int(q[0])].get_max() elif(q[4] == "bestPolicy"): answer = grid.gridStates[int(q[1])][int(q[0])].getPolicy(0.0)[1] elif(q[4] == "bestQValue" and q[3] == "RL"): answer = grid.gridStates[int(q[1])][int(q[0])].getPolicy(0.0)[0] index = questions.index(q) + 1 answer = "Question " + str(index) + ": " + ",".join(q) + ": " + str(answer) if(q[3] == "MDP"): draw_board(window, gridPolicies, [row[:-1] for row in terminal_states], boulder_states, max_reward(terminal_states), max_punishment(terminal_states), q[2], 'value-iteration', answer) elif(q[3] == "RL"): draw_board(window, gridPolicies, [row[:-1] for row in terminal_states], boulder_states, max_reward(terminal_states), max_punishment(terminal_states), q[2], 'q-learning', answer) windows.append(window) #display all queries for window in windows: window.mainloop()
from GridWorld import GridWorld from GridWorld import GridWorldAdditive from ValueIteration import ValueIteration # Run Value Iteration in different Grid World environments if __name__ == "__main__": gamma = 0.9 print("Grid world Value Iteration with discounted rewards gamma = %.2f\n" % gamma) terminals = {(0, 3): +1, (1, 3): -1} gw = GridWorld((3, 4), 0.8, [(1, 1)], terminals) vi = ValueIteration() values = vi.valueIteration(gw, gamma) gw.printValues(values) qvalues = vi.getQValues(gw, values, gamma) gw.printQValues(qvalues) policy = vi.getPolicy(gw, values, gamma) gw.printPolicy(policy) reward = -0.01 print("Grid world Value Iteration with additive rewards = %.2f\n" % reward) gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward) values = vi.valueIteration(gwa, 1, 100) gwa.printValues(values) qvalues = vi.getQValues(gwa, values, 1) gwa.printQValues(qvalues) policy = vi.getPolicy(gwa, values, 1) gwa.printPolicy(policy) reward = -0.04 print("Grid World with additive rewards = %.2f\n" % reward) gwa = GridWorldAdditive((3, 4), 0.8, [(1, 1)], terminals, reward)
print 'Generating map', outer, '(', configurations, 'configuations )' sys.stdout.flush() world = info['map'] rewards = info['rewards'] terminal = info['terminal'] instructions = info['instructions'] values = [] sprite = SpriteWorld(library.objects, library.background) sprite.makeGrid(world, args.vis_path + str(outer) + '_sprites') for inner in tqdm(range(configurations)): reward_map = rewards[inner] terminal_map = terminal[inner] instr = instructions[inner] mdp = MDP(world, reward_map, terminal_map) vi = ValueIteration(mdp) values_list, policy = vi.iterate() value_map = mdp.representValues(values_list) values.append(value_map) # visualize_values(mdp, values_list, policy, args.vis_path + str(outer) + '_' + str(inner) + '_values', title=instr) info['values'] = values filename = os.path.join(args.save_path, str(outer) + '.p') pickle.dump(info, open(filename, 'wb'))
self.display_mode_on = False def turn_on_display(self): self.display_mode_on = True code_to_decode = "RBGY" clock = pygame.time.Clock() mastermind = Mastermind(code_to_decode) mastermind.reset() #Value Iteration done = False optimal_policy, optimal_value = ValueIteration(mastermind, 0.85, 0.00000000000000001) state = mastermind.step("YGBR")[0] while not done: for event in pygame.event.get(): if event.type == pygame.QUIT: done = True ''' try according to the policy ''' actions = [] probs = [] for action, prob in optimal_policy[state].items(): actions.append(action) probs.append(prob)
def ValueIteration_Rtest(): VI = ValueIteration(0.00000000001, 0.5, "R") VI.valueIteration(0.00000000001, 0.5) for i in range(10): print(VI.trial_run())
def VI_R_reset(): VI = ValueIteration(0.00000000001, 0.5, "R", restart=True) VI.valueIteration(0.00000000001, 0.5) for i in range(10): print(VI.trial_run())
GAMMA = 0.9 if __name__ == "__main__": # Command line parser parser = argparse.ArgumentParser() parser.add_argument( "input_file", help="The name of the file to treat as the search space") parser.add_argument("--epsilon", help="epsilon for value iteration", type=float, default=0.5) args = parser.parse_args() # open file f = open(args.input_file, 'r') g = Graph(WIDTH, HEIGHT) # Create our graph structure to traverse create_graph_from_file(f, g) # Create and perform A* search v = ValueIteration(g, (0, 0), (WIDTH - 1, HEIGHT - 1), GAMMA, args.epsilon) util = v.run() v.set_utils(util) path = v.trace_path() # Print path to see verbose information about each node on the path #print path
def getQValues(self, env): vi = ValueIteration() values = vi.valueIteration(env) qvalues = vi.getQValues(env, values) return qvalues