def main(algorithm, track, x_start, y_start, discount, learning_rate, threshold, max_iterations, epsilon=None, reset_on_crash=False): """ Program entry. Runs selected algorithm on selected track, at given coordinates, with given parameters :param algorithm: String :param track: List :param x_start: Int :param y_start: Int :param discount: Float :param learning_rate: Float :param threshold: Float :param max_iterations: Int :param epsilon: Float :param reset_on_crash: Boolean :return: None """ with open(track) as f: specs = f.readline().strip().split(',') rows = int(specs[0]) cols = int(specs[1]) layout = f.read().splitlines() initial_state = (x_start, y_start, 0, 0) initial_action = (0, 0) agent = Car(initial_action, epsilon) environment = RaceTrack(rows, cols, layout, initial_state, reset_on_crash=reset_on_crash) if algorithm == 'value_iteration': value_iterator = ValueIteration(discount, threshold, max_iterations, environment, agent) value_iterator.run() path = value_iterator.extract_policy(initial_state) value_iterator.plot_max_diffs() elif algorithm == 'q_learning': q_learner = QLearning(discount, learning_rate, threshold, max_iterations, environment, agent) path = q_learner.run() q_learner.plot_avg_cost() elif algorithm == 'sarsa': sarsa = Sarsa(discount, learning_rate, threshold, max_iterations, environment, agent) path = sarsa.run() sarsa.plot_avg_cost() else: print("No algorithm selected") return None draw_track(path, layout)
def main(args): # resolve path to world map definition if not args.world: world_map_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'world_map.txt') else: world_map_path = args.world print("Reading world from %s" % world_map_path) if not os.path.exists(world_map_path): raise IOError( "World map definition not found at its expected path: %s" % world_map_path) world = World(world_map_path) visualizer = Visualizer(world) # Value Iteration value_iteration = ValueIteration(world, one_step_cost_v1, discount_factor=args.gamma, eps=10e-10) value_iteration.execute() optimal_policy = value_iteration.extract_policy() fig_vi = plt.figure() visualizer.draw(fig_vi, optimal_policy, value_iteration.value_fn, "Value Iteration (gamma = %.2f)" % args.gamma) # Policy Iteration policy_iteration = PolicyIteration(world, one_step_cost_v1, discount_factor=args.gamma) value_fn = policy_iteration.execute() fig_pi = plt.figure() visualizer.draw(fig_pi, policy_iteration.policy, value_fn, "Policy Iteration (gamma = %.2f)" % args.gamma) plt.show()