Exemplo n.º 1
0
from rltoolbox.algorithm.cmac import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import CMACApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats

if __name__ == "__main__":

    algorithm = CMACSARSA
    environment = BallBeam(max_steps=10000)
    environment.approximate_with(CMACApproximator, n_layers=4)
    n_episodes = 10
    n_repeats = 5

    book_parameters = {
        'alpha': 1,
        'gamma': 0.995,
        'epsilon': 0.1,
        'lambd': 0.0
    }

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode

    plot_learning_stats(learning_curves,
                        'BallBeam CMACSARSA learning',
                        savefig=False)
Exemplo n.º 2
0
#!/usr/bin/env python
import numpy as np

from rltoolbox.algorithm.fuzzy import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import FuzzyApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats


if __name__ == "__main__":

    algorithm = FR
    environment = BallBeam(max_steps=10000)
    environment.approximate_with(FuzzyApproximator)
    n_episodes = 5
    n_repeats = 5

    book_parameters = {'alpha': 0.1, 'beta': 0.01, 'epsilon': 0.1, 'lambd': 0.5}

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode
    
    plot_learning_stats(learning_curves, 'BallBeam FR learning')
Exemplo n.º 3
0
#!/usr/bin/env python
import numpy as np

from rltoolbox.algorithm.fuzzy import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import FuzzyApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats

if __name__ == "__main__":

    algorithm = FQ
    environment = BallBeam(max_steps=10000)
    environment.approximate_with(FuzzyApproximator)
    n_episodes = 5
    n_repeats = 5

    book_parameters = {
        'alpha': 0.1,
        'epsilon': 0.1,
        'gamma': 0.995,
        'lambd': 0.5
    }

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode

    plot_learning_stats(learning_curves, 'BallBeam FQ learning', savefig=False)
Exemplo n.º 4
0
#!/usr/bin/env python
from rltoolbox.algorithm.classic import Q
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import TableApproximator
from rltoolbox.misc import plot_learning_stats

environment = BallBeam(max_steps=10000)
environment.approximate_with(TableApproximator)
algorithm_instance = Q(environment,
                       alpha=0.01,
                       lambd=0.5,
                       epsilon=0.1,
                       gamma=0.995)

algorithm_instance.learn(n_episodes=20, render=False)

plot_learning_stats(algorithm_instance.steps_per_episode,
                    title='BallBeam Q(lambda)-learning')