Python QLearningAgent примеры использования

Язык программирования: Python

Пространство имен/Пакет: agent

Класс/Тип: QLearningAgent

Примеров на hotexamples.com: 4

Python QLearningAgent - 4 примера найдено. Это лучшие примеры Python кода для agent.QLearningAgent, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

QLearningAgent(11)

learn(2)

act(1)

choose_action(1)

epsilon(1)

epsilon_greedy(1)

loadQtable(1)

reset(1)

saveQtable(1)

save_policy(1)

scores(1)

take_action(1)

train(1)

updateQ(1)

update_Q_table(1)

Пример #1

Показать файл

Файл: tateti.py Проект: jmg/pyTateti

    def train(self, total_games=500):

        fixed_epsilon = None
        alpha = 0.5
        gamma = 0.9
        epsilon = 1

        agent = QLearningAgent(epsilon=epsilon, fixed_epsilon=fixed_epsilon, alpha=alpha, gamma=gamma, total_games=total_games)
        for game_number in range(total_games):

            last_action = None

            while True:
                game_result = game.game_results()
                if game_result is not None:
                    pygame.display.flip()
                    game.reset()
                    game.initialize()
                    break

                last_action = game.play(agent)

                pygame.display.flip()

        agent.save_policy()
        return agent

Пример #2

Показать файл

from tqdm import tqdm


def q(text=''):
    print(f'>{text}<')
    sys.exit()


from environment import TicTacToe
from agent import QLearningAgent, Hoooman
import config as cfg
from config import display_board

# initializing the TicTacToe environment and a QLearningAgent (the master Tic-Toc-Toc player, your opponent!)
env = TicTacToe()
player1 = QLearningAgent(name=cfg.playerX_QLearningAgent_name)
player1.loadQtable()  # load the learnt Q-Table
player1.epsilon = 0.0  # greedy actions only, 0 exploration

# initializing the agent class that let's you, the human user take the actions in the game
player2 = Hoooman()

# replay decides whether to rematch or not, at the end of a game
replay = True
while replay:

    done = False  # the episode goes on as long as done is False

    # deciding which player makes a move first
    playerID = random.choice([True, False])  # True means player1

Пример #3

Показать файл

Файл: main.py Проект: andylizf/maze

if __name__ == '__main__':
    episode = 30  # 训练多少回合

    epsilon = 0.8  # 使用历史经验的概率, 若值为0.9,则有 90% 的情况下,会根据历史经验选择 action, 10% 的情况下,随机选择 action
    learning_rate = 0.01  # 根据公式可知,该值越大,则旧训练数据被保留的就越少
    discount_factor = 0.9  #

    from maze_game.game import startGame
    env = startGame()

    key = input('Do you want to see the training process? [y] ')
    key = key == 'y' or key == ''
    
    agent = QLearningAgent(
        epsilon=epsilon,
        learning_rate=learning_rate,
        discount_factor=discount_factor,
        actions=Game.DIRECTION.ACTIONS
    )
    successful_step_counter_arr = []
    failed_step_counter_arr = []

    if key:
        env.display()

    for eps in range(1, episode + 1):

        cur_state = env.reset()
        step_counter = 0

        while True:
            step_counter += 1

Пример #4

Показать файл

import numpy as np
import pandas as pd
from env import TicTacToeEnv
from agent import QLearningAgent

env = TicTacToeEnv()
agent = QLearningAgent(env)

for game_nr in range(1000000):
    if game_nr % 10000 == 0:
        print(game_nr)
    done = False
    s = env.reset().copy()
    # print('Init', s)
    while not done:
        a = agent.take_action(s)
        r, s_, done, _ = env.step(a)
        agent.learn(s, a, r, s_, done)
        # print(s, a, r, s_, done)
        s = s_.copy()

V = pd.DataFrame.from_dict(agent._V,
                           orient='index',
                           dtype=np.float32,
                           columns=['V'])
N = pd.DataFrame.from_dict(agent._N,
                           orient='index',
                           dtype=np.uint32,
                           columns=['N'])
df = V.merge(N, how='left', left_index=True, right_index=True)
states = pd.DataFrame(df.index.values.tolist(), index=df.index)