Python QLearning.learn примеры использования

Язык программирования: Python

Пространство имен/Пакет: QLearning

Класс/Тип: QLearning

Метод/Функция: learn

Примеров на hotexamples.com: 2

Python QLearning.learn - 2 примера найдено. Это лучшие примеры Python кода для QLearning.QLearning.learn, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

QLearning(30)

train(6)

run(3)

evaluateOneEpisode(2)

learnOneEpisode(2)

learn(2)

DebugTrainNet(1)

update_q(1)

updateQFunction(1)

test(1)

setterExploration(1)

save(1)

q_learning_until_convergence(1)

max_q_action(1)

learning(1)

intiQtable(1)

getterTables(1)

initQ(1)

getterExploration(1)

get_action(1)

format_q_function(1)

format_policy(1)

exploration_pure(1)

exploration_infaillible(1)

exploration_exigente(1)

exploitationFromTable(1)

exploitation(1)

execute_policy(1)

execute(1)

epsilonGreedy(1)

StartTrial(1)

update_q_table(1)

Пример #1

Показать файл

Файл: Experiments.py Проект: shaoguangji/CS449

def experiment(test_game,
               num_experiments,
               file_name,
               num_episodes=500,
               alpha=.99,
               gamma=.9,
               epsilon=.9,
               decay_rate=.99):
    """
    Main experiment method that runs the Q-Learning experiments and returns prints and draws the needed diagrams.
    works by learning a model x number of times and then compiling the number of steps per epoch for experiment
    These are then averaged and used to create a graph.

    A policy is then also chosen to give an average number of steps needed to reach the goal metric.
    """

    list_of_moves_per_experiment = []
    policies = []
    for x in range(num_experiments):
        # Learn model
        q_learning = QLearning(test_game,
                               num_episodes=num_episodes,
                               alpha=alpha,
                               gamma=gamma,
                               epsilon=epsilon,
                               decay_rate=decay_rate)
        q = q_learning.learn()
        policies.append(q)

        num_moves = q_learning.num_moves_per_episode
        list_of_moves_per_experiment.append(num_moves)

    np.array(list_of_moves_per_experiment)
    moves_per_epoc_number = np.sum(list_of_moves_per_experiment, axis=0)
    moves_per_epoc_number = moves_per_epoc_number / num_experiments

    # get Average number of steps when executing.
    q_learning = QLearning(test_game,
                           num_episodes=num_episodes,
                           alpha=alpha,
                           gamma=gamma,
                           epsilon=epsilon,
                           decay_rate=decay_rate)
    avg_num_steps = 0
    for itter in range(100):
        num_steps = q_learning.execute_policy(policies[num_experiments - 1])
        avg_num_steps += num_steps[1]

    avg_num_steps /= 100.0

    generate_validation_curves(np.arange(num_episodes),
                               moves_per_epoc_number,
                               None,
                               "Number of steps",
                               None,
                               x_axis_label="Epoc Number",
                               y_axis_label="Average Path Length",
                               file_name=file_name)

    return avg_num_steps, policies[num_experiments - 1]

Пример #2

Показать файл

Файл: run.py Проект: chrisjtan/GridWorld

from environment import Env
from QLearning import QLearning

if __name__ == "__main__":
    env = Env()
    QL = QLearning(list(range(env.n_actions)))

    for episode in range(1000):
        state = env.reset()
        while True:
            env.render()

            # take action and proceed one step in the environment
            action = QL.get_action(str(state))
            next_state, reward, done = env.step(action)

            # with sample <s,a,r,s'>, agent learns new q function
            QL.learn(str(state), action, reward, str(next_state))

            state = next_state
            env.print_value_all(QL.q_table)

            # if episode ends, then break
            if done:
                break