restore = False

input_size = 9
output_size = 9
max_batch_size = 1000000

learning_rate = 0.001
discount_factor = 0.9
epsilon = 0.1

train_episode = 1000
verify_episode = 10000

env = tictactoe()
agent = AIagent_RL(learning_rate=learning_rate, restore=restore)
agent_base = AIagent_Base()


def update(agent, batch, dis=discount_factor):
    x_stack = np.empty(0).reshape(0, input_size)
    y_stack = np.empty(0).reshape(0, output_size)

    for state, action, reward, next_state, next_turn, done in batch:

        action_value = agent.action_value.predict(state)
        action_value = np.reshape(action_value, 9)

        if not done:
            next_action = agent.policy(next_state, next_turn, epsilon=0)
            next_action_value = agent.action_value.predict(next_state)
示例#2
0
import copy
from Tictactoe_Env import tictactoe
from Agent import AIagent_RL, AIagent_Base, Human_agent

env = tictactoe()
agent1 = AIagent_RL(restore=True)
agent2 = Human_agent()


def play():
    done = 0
    winner = 0
    env.reset()
    state = copy.copy(env.state)

    i = 0
    while not done:
        i += 1
        turn = copy.copy(env.turn)
        if i % 2 == 1:
            action = agent1.policy(state, turn, epsilon=0)
        else:
            action = agent2.policy(state, turn, epsilon=0)
        next_state, done, reward, winner = env.step(action)
        state = copy.copy(next_state)
        env.render()

    if winner == 0:
        print("Draw!")
    else:
        print("Winner is agent %d!" % winner)
示例#3
0
import matplotlib.pyplot as plt
import copy
import itertools
from Tictactoe_Env import tictactoe, predict, ret_turn
from Agent import AIagent_RL, AIagent_Base
from Functions import is_finished, available_actions

verify_episode = 100
discount_factor = 0.9

env = tictactoe()
agent = AIagent_RL(restore=False)
agent_base = AIagent_Base()

iteration_plt = []
v_plt = []
wr_plt = []


def policy_evaluation(agent):
    theta = 1e-9

    while True:
        delta = 0.0

        state_list = itertools.product([0, 1, 2], repeat=9)
        for state in state_list:
            state = list(state)
            done, winner = is_finished(state)
            if not done:  # except for terminal state
                v = agent.value(state)
示例#4
0
import copy
from Tictactoe_Env import tictactoe
from Agent import AIagent_RL, AIagent_Base, Human_agent

env = tictactoe()
agent1 = Human_agent()
agent2 = AIagent_RL(restore=True)


def play():
    done = 0
    winner = 0
    env.reset()
    state = copy.copy(env.state)

    i = 0
    while not done:
        i += 1
        turn = copy.copy(env.turn)
        if i % 2 == 1:
            action = agent1.policy(state, turn, epsilon=0)
        else:
            action = agent2.policy(state, turn, epsilon=0)
        next_state, done, reward, winner = env.step(action)
        state = copy.copy(next_state)
        env.render()

    if winner == 0:
        print("Draw!")
    else:
        print("Winner is agent %d!" % winner)