Exemplo n.º 1
0
def convergence_test():
    model = md.DeterministicModel(10,
                                  10,
                                  1,
                                  sensor=Sensor.DeterministicSensor(3),
                                  episode_length=20)
    iterations = 20000
    policy, c1, p1 = q_learning(model,
                                -2,
                                2,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    policy, c2, p2 = q_learning(model,
                                -3,
                                3,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    policy, c3, p3 = q_learning(model,
                                -6,
                                6,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    model = md.DeterministicModel(25,
                                  10,
                                  1,
                                  sensor=Sensor.DeterministicSensor(3),
                                  episode_length=50)
    iterations = 20000
    policy, c4, p4 = q_learning(model,
                                -2,
                                2,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    policy, c5, p5 = q_learning(model,
                                -3,
                                3,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    policy, c6, p6 = q_learning(model,
                                -6,
                                6,
                                iterations,
                                epsilon=0.2,
                                alpha=.9,
                                return_convergence=True)
    model.reset()
    model.print_policy(policy)
    model.update(0, True)
    model.n_update(model, n=5)
    model.print_policy(policy)
    print(model)
    print(policy.pi)
    utils.plot_convergence([c1, c2, c3, c4, c5, c6], [
        "action space = [-2, 2], state space 10x10",
        "action space = [-3, 3], state space 10x10",
        "action space = [-6, 6], state space 10x10",
        "action space = [-2, 2], state space 25x10",
        "action space = [-3, 3], state space 25x10",
        "action space = [-6, 6], state space 25x10"
    ])
    utils.plot_convergence([p1, p2, p3, p4, p5, p6], [
        "action space = [-2, 2], state space 10x10",
        "action space = [-3, 3], state space 10x10",
        "action space = [-6, 6], state space 10x10",
        "action space = [-2, 2], state space 25x10",
        "action space = [-3, 3], state space 25x10",
        "action space = [-6, 6], state space 25x10"
    ])