from rltoolbox.algorithm.cmac import * from rltoolbox.environment.continuous import BallBeam from rltoolbox.approximator import CMACApproximator from rltoolbox.misc import compare_learning_curves, plot_learning_stats if __name__ == "__main__": algorithm = CMACSARSA environment = BallBeam(max_steps=10000) environment.approximate_with(CMACApproximator, n_layers=4) n_episodes = 10 n_repeats = 5 book_parameters = { 'alpha': 1, 'gamma': 0.995, 'epsilon': 0.1, 'lambd': 0.0 } learning_curves = np.zeros((n_repeats, n_episodes)) for i in range(n_repeats): alg_instance = algorithm(environment, **book_parameters) alg_instance.learn(n_episodes, render=False) learning_curves[i, :] = alg_instance.steps_per_episode plot_learning_stats(learning_curves, 'BallBeam CMACSARSA learning', savefig=False)
#!/usr/bin/env python import numpy as np from rltoolbox.algorithm.fuzzy import * from rltoolbox.environment.continuous import BallBeam from rltoolbox.approximator import FuzzyApproximator from rltoolbox.misc import compare_learning_curves, plot_learning_stats if __name__ == "__main__": algorithm = FR environment = BallBeam(max_steps=10000) environment.approximate_with(FuzzyApproximator) n_episodes = 5 n_repeats = 5 book_parameters = {'alpha': 0.1, 'beta': 0.01, 'epsilon': 0.1, 'lambd': 0.5} learning_curves = np.zeros((n_repeats, n_episodes)) for i in range(n_repeats): alg_instance = algorithm(environment, **book_parameters) alg_instance.learn(n_episodes, render=False) learning_curves[i, :] = alg_instance.steps_per_episode plot_learning_stats(learning_curves, 'BallBeam FR learning')
#!/usr/bin/env python import numpy as np from rltoolbox.algorithm.fuzzy import * from rltoolbox.environment.continuous import BallBeam from rltoolbox.approximator import FuzzyApproximator from rltoolbox.misc import compare_learning_curves, plot_learning_stats if __name__ == "__main__": algorithm = FQ environment = BallBeam(max_steps=10000) environment.approximate_with(FuzzyApproximator) n_episodes = 5 n_repeats = 5 book_parameters = { 'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.995, 'lambd': 0.5 } learning_curves = np.zeros((n_repeats, n_episodes)) for i in range(n_repeats): alg_instance = algorithm(environment, **book_parameters) alg_instance.learn(n_episodes, render=False) learning_curves[i, :] = alg_instance.steps_per_episode plot_learning_stats(learning_curves, 'BallBeam FQ learning', savefig=False)
#!/usr/bin/env python from rltoolbox.algorithm.classic import Q from rltoolbox.environment.continuous import BallBeam from rltoolbox.approximator import TableApproximator from rltoolbox.misc import plot_learning_stats environment = BallBeam(max_steps=10000) environment.approximate_with(TableApproximator) algorithm_instance = Q(environment, alpha=0.01, lambd=0.5, epsilon=0.1, gamma=0.995) algorithm_instance.learn(n_episodes=20, render=False) plot_learning_stats(algorithm_instance.steps_per_episode, title='BallBeam Q(lambda)-learning')