Python AIagent_RL示例

编程语言: Python

命名空间/包名称: Agent

类/类型: AIagent_RL

hotexamples.com的示例: 4

Python AIagent_RL - 已找到4个示例。这些是从开源项目中提取的最受好评的Agent.AIagent_RL现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

AIagent_RL(4)

常用方法

AIagent_RL (4)

示例#1

显示文件

文件： Train.py 项目： sjy366/Tensorflow-Q-learning-Tic-tac-toe-AI

restore = False

input_size = 9
output_size = 9
max_batch_size = 1000000

learning_rate = 0.001
discount_factor = 0.9
epsilon = 0.1

train_episode = 1000
verify_episode = 10000

env = tictactoe()
agent = AIagent_RL(learning_rate=learning_rate, restore=restore)
agent_base = AIagent_Base()


def update(agent, batch, dis=discount_factor):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)

    for state, action, reward, next_state, next_turn, done in batch:

        action_value = agent.action_value.predict(state)
        action_value = np.reshape(action_value, 9)

        if not done:
            next_action = agent.policy(next_state, next_turn, epsilon=0)
            next_action_value = agent.action_value.predict(next_state)

示例#2

显示文件

import copy
from Tictactoe_Env import tictactoe
from Agent import AIagent_RL, AIagent_Base, Human_agent

env = tictactoe()
agent1 = AIagent_RL(restore=True)
agent2 = Human_agent()


def play():
    done = 0
    winner = 0
    env.reset()
    state = copy.copy(env.state)

    i = 0
    while not done:
        i += 1
        turn = copy.copy(env.turn)
        if i % 2 == 1:
            action = agent1.policy(state, turn, epsilon=0)
        else:
            action = agent2.policy(state, turn, epsilon=0)
        next_state, done, reward, winner = env.step(action)
        state = copy.copy(next_state)
        env.render()

    if winner == 0:
        print("Draw!")
    else:
        print("Winner is agent %d!" % winner)

示例#3

显示文件

import matplotlib.pyplot as plt
import copy
import itertools
from Tictactoe_Env import tictactoe, predict, ret_turn
from Agent import AIagent_RL, AIagent_Base
from Functions import is_finished, available_actions

verify_episode = 100
discount_factor = 0.9

env = tictactoe()
agent = AIagent_RL(restore=False)
agent_base = AIagent_Base()

iteration_plt = []
v_plt = []
wr_plt = []


def policy_evaluation(agent):
    theta = 1e-9

    while True:
        delta = 0.0

        state_list = itertools.product([0, 1, 2], repeat=9)
        for state in state_list:
            state = list(state)
            done, winner = is_finished(state)
            if not done:  # except for terminal state
                v = agent.value(state)

示例#4

显示文件

import copy
from Tictactoe_Env import tictactoe
from Agent import AIagent_RL, AIagent_Base, Human_agent

env = tictactoe()
agent1 = Human_agent()
agent2 = AIagent_RL(restore=True)


def play():
    done = 0
    winner = 0
    env.reset()
    state = copy.copy(env.state)

    i = 0
    while not done:
        i += 1
        turn = copy.copy(env.turn)
        if i % 2 == 1:
            action = agent1.policy(state, turn, epsilon=0)
        else:
            action = agent2.policy(state, turn, epsilon=0)
        next_state, done, reward, winner = env.step(action)
        state = copy.copy(next_state)
        env.render()

    if winner == 0:
        print("Draw!")
    else:
        print("Winner is agent %d!" % winner)