Exemplo n.º 1
0
def test_connect_four():
    game = ConnectFour()
    game.play_move(1)
    game.play_move(2)
    game.play_move(3)
    board = game.board
    game2 = ConnectFour.from_numpy(board, is_canonical=False)
    board2 = game2.board
    np.testing.assert_array_equal(board, board2)
    assert type(game2) == type(game)
Exemplo n.º 2
0
def test_mcts_search():
    from pyoaz.thread_pool import ThreadPool
    from pyoaz.search import Search, PlayerSearchProperties
    from pyoaz.selection import UCTSelector
    from pyoaz.evaluator.simulation_evaluator import SimulationEvaluator
    from pyoaz.games.connect_four import ConnectFour

    thread_pool = ThreadPool(n_workers=1)
    evaluator = SimulationEvaluator(thread_pool=thread_pool)
    selector = UCTSelector()
    player_search_properties = [
        PlayerSearchProperties(evaluator, selector),
        PlayerSearchProperties(evaluator, selector)
    ]
    game = ConnectFour()
    _ = Search(
        game=game,
        player_search_properties=player_search_properties,
        thread_pool=thread_pool,
        n_concurrent_workers=1,
        n_iterations=25000,
        noise_epsilon=0.0,
        noise_alpha=0.0,
    )
Exemplo n.º 3
0
from pyoaz.games.connect_four import apply_symmetry, ConnectFour
import numpy as np

POLICy_0 = np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
POLICy_1 = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0])

SYM_POLICy_0 = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
SYM_POLICy_1 = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0])

game_0 = ConnectFour()
game_0.play_move(0)
BOARD_0 = game_0.board.copy()

game_1 = ConnectFour()
game_1.play_move(1)
game_1.play_move(1)
game_1.play_move(3)
game_1.play_move(6)
BOARD_1 = game_1.board.copy()

sym_game_0 = ConnectFour()
sym_game_0.play_move(6)
SYM_BOARD_0 = sym_game_0.board.copy()

sym_game_1 = ConnectFour()
sym_game_1.play_move(5)
sym_game_1.play_move(5)
sym_game_1.play_move(3)
sym_game_1.play_move(0)
SYM_BOARD_1 = sym_game_1.board.copy()
Exemplo n.º 4
0
def test_az_search_with_cache():
    with tf.Session() as session:

        # Neural network definition
        input = tf.placeholder(
            dtype=tf.float32, shape=[None, 6, 7, 2], name="input"
        )
        conv0_filters = tf.Variable(
            [
                [[[1.0], [1.0]]],
                [[[-2.0], [-2.0]]],
                [[[3.0], [3.0]]],
                [[[4.0], [4.0]]],
            ],
            name="conv0_filters",
            dtype=tf.float32,
        )
        conv0 = tf.nn.conv2d(input, conv0_filters, 1, "SAME")
        max_pool0 = tf.nn.max_pool2d(
            conv0, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME"
        )
        flat = tf.reshape(max_pool0, [-1, 12], name="flat")
        dense_value = tf.Variable([[1.0] for _ in range(12)], dtype=tf.float32)
        dense_policy = tf.Variable(
            [[1.0 for _ in range(7)] for _ in range(12)], dtype=tf.float32
        )
        dense_value = tf.Variable([[1.0] for _ in range(12)], dtype=tf.float32)
        _ = tf.matmul(flat, dense_value, name="value")
        dense_policy = tf.Variable(
            [[1.0 for _ in range(7)] for _ in range(12)], dtype=tf.float32
        )
        _ = tf.matmul(flat, dense_value, name="value")
        policy_logits = tf.matmul(flat, dense_policy, name="policy_logits")
        _ = tf.nn.softmax(policy_logits, name="policy")

        session.run(tf.global_variables_initializer())

        # AZ search definition
        model = Model(
            session=session,
            input_node_name="input",
            value_node_name="value",
            policy_node_name="policy",
        )
        thread_pool = ThreadPool(n_workers=1)
        cache = SimpleCache(ConnectFour(), 100)
        evaluator = NNEvaluator(
            model=model,
            cache=cache,
            thread_pool=thread_pool,
            dimensions=(6, 7, 2),
            batch_size=1,
        )
        selector = AZSelector()
        player_search_properties = [
            PlayerSearchProperties(evaluator, selector),
            PlayerSearchProperties(evaluator, selector)
        ]
        game = ConnectFour()
        _ = Search(
            game=game,
            player_search_properties=player_search_properties,
            thread_pool=thread_pool,
            n_concurrent_workers=1,
            n_iterations=100,
            noise_epsilon=0.25,
            noise_alpha=1,
        )
Exemplo n.º 5
0
def worker(id, pool, evaluator):
    print(f"Thread {id} started")
    game = ConnectFour()
    self_play(game, pool, evaluator)
Exemplo n.º 6
0
import random

from tqdm.auto import tqdm

from pyoaz.games.connect_four import ConnectFour

boards = []

print("Playing random games")
for i in tqdm(range(1000)):
    game = ConnectFour()

    while not game.finished:

        moves = game.available_moves
        move = random.choice(moves)
        game.play_move(move)
        boards.append(game.board)
print(f"Collected {len(boards)} positions")


new_boards = []

print("Playing from collected boards")

games = [ConnectFour.from_numpy(board) for board in boards]
games = [game for game in games if not game.finished]

for game in tqdm(games, total=len(games)):
    while not game.finished:
        moves = game.available_moves