Exemplo n.º 1
0
def main(algorithm, track, x_start, y_start, discount, learning_rate, threshold, max_iterations, epsilon=None, reset_on_crash=False):
    """
    Program entry. Runs selected algorithm on selected track, at given coordinates, with given parameters
    :param algorithm: String
    :param track: List
    :param x_start: Int
    :param y_start: Int
    :param discount: Float
    :param learning_rate: Float
    :param threshold: Float
    :param max_iterations: Int
    :param epsilon: Float
    :param reset_on_crash: Boolean
    :return: None
    """
    with open(track) as f:
        specs = f.readline().strip().split(',')
        rows = int(specs[0])
        cols = int(specs[1])
        layout = f.read().splitlines()

        initial_state = (x_start, y_start, 0, 0)
        initial_action = (0, 0)

        agent = Car(initial_action, epsilon)
        environment = RaceTrack(rows, cols, layout, initial_state, reset_on_crash=reset_on_crash)

        if algorithm == 'value_iteration':
            value_iterator = ValueIteration(discount, threshold, max_iterations, environment, agent)
            value_iterator.run()
            path = value_iterator.extract_policy(initial_state)
            value_iterator.plot_max_diffs()
        elif algorithm == 'q_learning':
            q_learner = QLearning(discount, learning_rate, threshold, max_iterations, environment, agent)
            path = q_learner.run()
            q_learner.plot_avg_cost()
        elif algorithm == 'sarsa':
            sarsa = Sarsa(discount, learning_rate, threshold, max_iterations, environment, agent)
            path = sarsa.run()
            sarsa.plot_avg_cost()
        else:
            print("No algorithm selected")
            return None
        draw_track(path, layout)
Exemplo n.º 2
0
import helper
from sarsa import Sarsa
from qlearning import QLearning
from sarsa_expected import SarsaExpected

# File to run in order to generate all the plots sequentially
if __name__ == '__main__':
    data_X = np.arange(start=0, stop=10000, step=100)

    # Agent 1: Sarsa(0)
    data_Y1 = np.zeros((100, 1))
    for seed in range(50):
        print(f'Seed: {seed}')
        sarsa = Sarsa(seed=seed, num_actions=4, alpha=0.1)
        y = sarsa.run()
        data_Y1 += y
    data_Y1 /= 10
    helper.plotSingle(data_X, data_Y1, "Sarsa(0)")

    # Sarsa(0) with King's move
    data_Y2 = np.zeros((100, 1))
    for seed in range(50):
        print(f'Seed: {seed}')
        sarsa = Sarsa(seed=seed, num_actions=8, alpha=0.1)
        y = sarsa.run()
        data_Y2 += y
    data_Y2 /= 10
    helper.plotSingle(data_X, data_Y2, "Sarsa(0) with King's move")

    # Sarsa(0) with stochastic wind