Пример #1
0
def experiment(test_game,
               num_experiments,
               file_name,
               num_episodes=500,
               alpha=.99,
               gamma=.9,
               epsilon=.9,
               decay_rate=.99):
    """
    Main experiment method that runs the Q-Learning experiments and returns prints and draws the needed diagrams.
    works by learning a model x number of times and then compiling the number of steps per epoch for experiment
    These are then averaged and used to create a graph.

    A policy is then also chosen to give an average number of steps needed to reach the goal metric.
    """

    list_of_moves_per_experiment = []
    policies = []
    for x in range(num_experiments):
        # Learn model
        q_learning = QLearning(test_game,
                               num_episodes=num_episodes,
                               alpha=alpha,
                               gamma=gamma,
                               epsilon=epsilon,
                               decay_rate=decay_rate)
        q = q_learning.learn()
        policies.append(q)

        num_moves = q_learning.num_moves_per_episode
        list_of_moves_per_experiment.append(num_moves)

    np.array(list_of_moves_per_experiment)
    moves_per_epoc_number = np.sum(list_of_moves_per_experiment, axis=0)
    moves_per_epoc_number = moves_per_epoc_number / num_experiments

    # get Average number of steps when executing.
    q_learning = QLearning(test_game,
                           num_episodes=num_episodes,
                           alpha=alpha,
                           gamma=gamma,
                           epsilon=epsilon,
                           decay_rate=decay_rate)
    avg_num_steps = 0
    for itter in range(100):
        num_steps = q_learning.execute_policy(policies[num_experiments - 1])
        avg_num_steps += num_steps[1]

    avg_num_steps /= 100.0

    generate_validation_curves(np.arange(num_episodes),
                               moves_per_epoc_number,
                               None,
                               "Number of steps",
                               None,
                               x_axis_label="Epoc Number",
                               y_axis_label="Average Path Length",
                               file_name=file_name)

    return avg_num_steps, policies[num_experiments - 1]
Пример #2
0
from environment import Env
from QLearning import QLearning

if __name__ == "__main__":
    env = Env()
    QL = QLearning(list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()
        while True:
            env.render()

            # take action and proceed one step in the environment
            action = QL.get_action(str(state))
            next_state, reward, done = env.step(action)

            # with sample <s,a,r,s'>, agent learns new q function
            QL.learn(str(state), action, reward, str(next_state))

            state = next_state
            env.print_value_all(QL.q_table)

            # if episode ends, then break
            if done:
                break