def print_q_values(self):
     print(f"num q_values = {len(self.qtable.cache)}")
     for k, v in self.qtable.cache.items():
         b = np.frombuffer(k, dtype=int)
         board = Board(b)
         board.print_board()
         print(f"qvalue = {v}")
def test_get_game_result_not_over():
    b = np.array([[1, 1, -1], [0, -1, 0], [1, -1, 1]]).flatten()

    board = Board(b)

    result = board.get_game_result()

    assert result == RESULT_NOT_OVER
def test_get_game_result_o_wins():
    b = np.array([[1, 0, -1], [0, -1, 1], [-1, 0, 1]]).flatten()

    board = Board(b)

    result = board.get_game_result()

    assert result == RESULT_O_WINS
def test_get_game_result_draw():
    b = np.array([[1, 1, -1], [-1, -1, 1], [1, -1, 1]]).flatten()

    board = Board(b)

    result = board.get_game_result()

    assert result == RESULT_DRAW
def test_get_position_value_from_cache():
    b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten()

    value, found = cache.get_for_position(Board(b))

    assert (value, found) == (None, False)

    cache.set_for_position(Board(b), -1)

    (value, _), found = cache.get_for_position(Board(b))

    assert (value, found) == (-1, True)
def test_play_minimax_move_o_wins_in_best_case():
    b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten()

    result = play_minimax_move(Board(b)).board

    assert np.array_equal(
        result,
        np.array([[1, 0, 0], [1, -1, 1], [-1, 0, -1]]).flatten())
Пример #7
0
def test_choose_move_index_with_transformation():
    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, -1)
    q_table.update_q_value(board, 2, 1)

    b_transformed = np.rot90(b_2d, 2).flatten()

    board_transformed = Board(b_transformed)

    move_index = choose_move_index([q_table], board_transformed, 0)

    assert move_index == 6
def transform_board_and_qvalues(board, q_values, transform):
    b_2d_transformed = transform.transform(board.board_2d)

    q_2d = load_q_values_into_2d_board(q_values)
    q_2d_transformed = transform.transform(q_2d)
    q_values_transformed = dict([
        (mi, qv) for (mi, qv) in enumerate(q_2d_transformed.flatten())
        if not np.isnan(qv)
    ])

    return Board(b_2d_transformed.flatten()), q_values_transformed
def test_convert_to_tensor():
    b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten()

    board = Board(b)

    t = torch.tensor(board.board, dtype=torch.float)

    t = convert_to_tensor(board)

    t_expected = torch.tensor([1., 0., 0., 1., -1., 1., -1., 1., -1.])
    assert torch.all(torch.eq(t_expected, t))
Пример #10
0
def test_choose_move_index_2nd_move():
    b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, 0.5)
    q_table.update_q_value(board, 2, 1)

    action_index = choose_move_index([q_table], board, 0)

    assert action_index == 2
    def update_q_value(self, board, move_index, qvalue):
        new_position = board.play_move(move_index)

        result, found = self.qtable.get_for_position(new_position)
        if found is True:
            _, t = result
            new_position_transformed = Board(
                t.transform(new_position.board_2d).flatten())
            self.qtable.set_for_position(new_position_transformed, qvalue)
            return

        self.qtable.set_for_position(new_position, qvalue)
Пример #12
0
def test_get_q_values_initial_o_turn():
    b = np.array([[1, 0, -1],
                  [1, 0, -1],
                  [1, 0,  0]]).flatten()

    q_table = QTable()

    q_values = q_table.get_q_values(Board(b))

    expected_q_values = {1: INITIAL_Q_VALUES_FOR_O, 4: INITIAL_Q_VALUES_FOR_O,
                         7: INITIAL_Q_VALUES_FOR_O, 8: INITIAL_Q_VALUES_FOR_O}

    assert q_values == expected_q_values
Пример #13
0
def test_update_q_value():
    qtable = QTable()

    b_2d = np.array([[1.0,  0.0,  0.0],
                     [1.0, -1.0,  0.0],
                     [0.0,  1.0, -1.0]])
    b = b_2d.flatten()

    board = Board(b)

    qvalues = qtable.get_q_values(board)

    init = INITIAL_Q_VALUES_FOR_O

    expected_qvalues = {1: init, 2: init, 5: init, 6: init}

    assert qvalues == expected_qvalues

    b_rot90_flipud_2d = np.flipud(np.rot90(b_2d))
    b_rot90_flipud = b_rot90_flipud_2d.flatten()

    board_rot90_flipud = Board(b_rot90_flipud)

    qtable.update_q_value(board_rot90_flipud, 2, 0.8)
    qtable.update_q_value(board_rot90_flipud, 7, 0.7)

    assert len(qtable.qtable.cache) == 2

    expected_qvalues = {1: init, 2: init, 5: 0.7, 6: 0.8}

    qvalues = qtable.get_q_values(board)

    assert qvalues == expected_qvalues

    expected_qvalues = {2: 0.8, 3: init, 6: init, 7: 0.7}

    qvalues = qtable.get_q_values(board_rot90_flipud)

    assert qvalues == expected_qvalues
def test_play_qneural_move():
    net = TicTacNet()
    target_net = TicTacNet()
    sgd = torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=0)
    loss_function = MSELoss()
    net_context = NetContext(net, target_net, sgd, loss_function)

    play = create_qneural_player(net_context)

    b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten()

    board = Board(b)

    updated_board = play(board)

    assert np.array_equal(updated_board.board,
                          np.array([1, -1, 0, 1, -1, 1, -1, 1, -1]))
Пример #15
0
def test_get_move_average_q_value_pairs():
    qtable_a = QTable()
    qtable_b = QTable()

    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    qtable_a.update_q_value(board, 1, 0.0)
    qtable_a.update_q_value(board, 2, 1.0)

    qtable_b.update_q_value(board, 1, -0.5)
    qtable_b.update_q_value(board, 2, 0.5)

    pairs = get_move_average_q_value_pairs([qtable_a, qtable_b], board)

    assert pairs == [(1, -0.25), (2, 0.75)]
from tictac.minimax import create_minimax_player
from tictac.qneural import (TicTacNet, NetContext, create_qneural_player,
                            get_q_values, play_training_games_x,
                            play_training_games_o)

play_minimax_move_randomized = create_minimax_player(True)
play_minimax_move_not_randomized = create_minimax_player(False)

policy_net = TicTacNet()
target_net = TicTacNet()
sgd = torch.optim.SGD(policy_net.parameters(), lr=0.1)
loss = MSELoss()
net_context = NetContext(policy_net, target_net, sgd, loss)

with torch.no_grad():
    board = Board(np.array([1, -1, -1, 0, 1, 1, 0, 0, -1]))
    q_values = get_q_values(board, net_context.target_net)
    print(f"Before training q_values = {q_values}")

print("Training qlearning X vs. random...")
play_training_games_x(net_context=net_context, o_strategies=[play_random_move])
print("Training qlearning O vs. random...")
play_training_games_o(net_context=net_context, x_strategies=[play_random_move])

print("")

with torch.no_grad():
    play_qneural_move = create_qneural_player(net_context)

    print("Playing qneural vs random:")
    print("--------------------------")
def test_get_random_valid_move():
    b = np.array([0, -1, 0, 0, -1, 0, 1, 0, 1])

    move = Board(b).get_random_valid_move_index()

    assert move in [0, 2, 3, 5, 7]
def test_get_position_value_o_wins():
    b = np.array([[1, 0, -1], [1, 0, -1], [0, 1, -1]]).flatten()

    value = get_position_value(Board(b))

    assert value == RESULT_O_WINS
def test_get_position_value_draw_is_best_case():
    b = np.array([[1, -1, 0], [1, 1, -1], [-1, 1, -1]]).flatten()

    value = get_position_value(Board(b))

    assert value == RESULT_DRAW
def test_get_position_value_o_wins_in_best_case_o_turn():
    b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten()

    value = get_position_value(Board(b))

    assert value == RESULT_O_WINS
def test_get_move_value_pairs_for_position_o_wins_in_best_case():
    b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten()

    move_value_pairs = get_move_value_pairs(Board(b))

    assert move_value_pairs == [(1, 1), (2, 1), (6, -1), (7, 1)]
Пример #22
0
def test_play_training_game_x_player():
    q_table = QTable()
    move_history = deque()
    q_table_player = CELL_X
    x_strategy = create_training_player([q_table], move_history, 0)
    o_strategy = play_random_move

    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_X
    first_board = np.copy(new_board)

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_X
    second_board[7] = CELL_O

    val = 0.9 * 0.9
    expected_move_indexes_and_q_values = {1: val, 2: init,
                                          3: init, 4: init, 5: init,
                                          6: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[1] = CELL_X
    third_board[5] = CELL_O

    val = 0.9 * 1.0
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init,
                                          6: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    move_history = deque()
    x_strategy = create_training_player([q_table], move_history, 0)
    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_X
    first_board = np.copy(new_board)

    val = 0.1 * (0.81 * 0.9) + 0.9 * (0.9 * (0.9 * 0.81))
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_X
    second_board[1] = CELL_O

    val = 0.9 * (0.9 * 0.81)
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init, 5: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[2] = CELL_X
    third_board[5] = CELL_O

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {3: val, 4: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    fourth_board = np.copy(third_board)
    fourth_board[3] = CELL_X
    fourth_board[8] = CELL_O

    val = 0.81
    expected_move_indexes_and_q_values = {4: val,
                                          6: init, 7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(fourth_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    fifth_board = np.copy(fourth_board)
    fifth_board[4] = CELL_X
    fifth_board[7] = CELL_O

    val = 0.9
    expected_move_indexes_and_q_values = {6: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(fifth_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values
def perform_training_playouts(node_cache=nodecache, board=Board(),
                              num_playouts=4000, display_progress=True):
    for game in range(num_playouts):
        perform_game_playout(node_cache, board)
        if display_progress is True and (game+1) % (num_playouts / 10) == 0:
            print(f"{game+1}/{num_playouts} playouts...")
Пример #24
0
def test_play_training_game_o_player():
    q_table = QTable()
    move_history = deque()
    q_table_player = CELL_O
    x_strategy = play_random_move
    o_strategy = create_training_player([q_table], move_history, 0)

    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_O
    first_board = np.copy(new_board)
    first_board[6] = CELL_X

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: init,
                                          3: init, 4: init, 5: init,
                                          7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_O
    second_board[8] = CELL_X

    val = 0.9 * 0.9
    expected_move_indexes_and_q_values = {1: val,  2: init,
                                          3: init, 4: init, 5: init,
                                          7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[1] = CELL_O
    third_board[5] = CELL_X

    val = 0.9 * 1.0
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init,
                                          7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    move_history = deque()
    o_strategy = create_training_player([q_table], move_history, 0)
    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_O
    first_board = np.copy(new_board)
    first_board[0] = CELL_X

    val = (1 - 0.9) * (0.9 * 0.81) + (0.9 * 0.0)
    expected_move_indexes_and_q_values = {1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[2] = CELL_O
    second_board[4] = CELL_X

    val = 0.9 * -1
    expected_move_indexes_and_q_values = {1: val,
                                          3: init, 5: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values
Пример #25
0
def test_play_mcts_move():
    b_2d = np.array([[1, 1, 0], [1, -1, 0], [-1, 1, -1]])
    b = b_2d.flatten()
    board = Board(b)
    nc = BoardCache()

    parent_node = find_or_create_node(nc, board)
    actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws,
                    parent_node.losses)
    assert actual_stats == (0, 0, 0, 0)

    values = calculate_values(nc, board)
    expected_values = [(2, math.inf), (5, math.inf)]
    assert list(values) == expected_values

    perform_game_playout(nc, board)

    actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws,
                    parent_node.losses)
    assert actual_stats == (1, 0, 0, 1)

    child_node_2 = find_or_create_node(nc, board.play_move(2))
    actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws,
                    child_node_2.losses)
    assert actual_stats == (1, 1, 0, 0)

    child_node_5 = find_or_create_node(nc, board.play_move(5))
    actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws,
                    child_node_5.losses)
    assert actual_stats == (0, 0, 0, 0)

    values = calculate_values(nc, board)
    expected_values = [(2, 1.0), (5, math.inf)]
    assert list(values) == expected_values

    perform_game_playout(nc, board)

    actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws,
                    parent_node.losses)
    assert actual_stats == (2, 1, 0, 1)

    actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws,
                    child_node_2.losses)
    assert actual_stats == (1, 1, 0, 0)

    actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws,
                    child_node_5.losses)
    assert actual_stats == (1, 0, 0, 1)

    values = calculate_values(nc, board)
    expected_values = [(2, 2.177410022515475), (5, 1.1774100225154747)]
    assert list(values) == expected_values

    perform_training_playouts(nc, board, 100, False)

    actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws,
                    parent_node.losses)
    assert actual_stats == (102, 6, 0, 96)

    actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws,
                    child_node_2.losses)
    assert actual_stats == (96, 96, 0, 0)

    actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws,
                    child_node_5.losses)
    assert actual_stats == (6, 0, 0, 6)

    values = calculate_values(nc, board)
    expected_values = [(2, 1.3104087632087014), (5, 1.2416350528348057)]
    assert list(values) == expected_values
Пример #26
0
def play():
    """
    The process of playing a game
    """
    b = Board()
    print(b)
    result = b.check_the_Board()

    while result:
        player1 = 1
        player2 = 2

        if b.check_haveplace() != 0:
            print('Player 1')
            try:
                first = get_coords()
            except ValueError as e:
                print(e)
                first = get_coords()

            if first != False:
                try:
                    b.put_move(1, first)
                except IndexError as e:
                    print(e)
                    print('Player 1')
                    first = get_coords()
                    b.put_move(1, first)
                print(b)

        if b.check_the_Board() in [
                'EQUITY noone won', 'winner is first player',
                'winner is second player'
        ]:
            result = False

        if b.check_haveplace() != 0 and result != False:
            print('Player 2')
            try:
                second = get_coords()
            except ValueError as e:
                print(e)
                second = get_coords()

            if second != False:

                try:
                    b.put_move(2, second)
                except IndexError as e:
                    print(e)
                    print('Player 2')
                    second = get_coords()
                    b.put_move(2, second)
                print(b)

        if b.check_the_Board() in [
                'EQUITY noone won', 'winner is first player',
                'winner is second player'
        ]:
            result = False

        if b.check_haveplace() == 0:
            result = False
    print(b.check_the_Board())
def test_get_valid_move_indexes():
    board = Board(np.array([0, -1, 0, 0, -1, 0, 1, 0, 1]))

    valid_indexes = board.get_valid_move_indexes()

    assert valid_indexes == [0, 2, 3, 5, 7]