示例#1
0
def test_get_q_values_initial_o_turn():
    b = np.array([[1, 0, -1],
                  [1, 0, -1],
                  [1, 0,  0]]).flatten()

    q_table = QTable()

    q_values = q_table.get_q_values(Board(b))

    expected_q_values = {1: INITIAL_Q_VALUES_FOR_O, 4: INITIAL_Q_VALUES_FOR_O,
                         7: INITIAL_Q_VALUES_FOR_O, 8: INITIAL_Q_VALUES_FOR_O}

    assert q_values == expected_q_values
示例#2
0
def test_get_move_average_q_value_pairs():
    qtable_a = QTable()
    qtable_b = QTable()

    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    qtable_a.update_q_value(board, 1, 0.0)
    qtable_a.update_q_value(board, 2, 1.0)

    qtable_b.update_q_value(board, 1, -0.5)
    qtable_b.update_q_value(board, 2, 0.5)

    pairs = get_move_average_q_value_pairs([qtable_a, qtable_b], board)

    assert pairs == [(1, -0.25), (2, 0.75)]
示例#3
0
def test_choose_move_index_2nd_move():
    b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, 0.5)
    q_table.update_q_value(board, 2, 1)

    action_index = choose_move_index([q_table], board, 0)

    assert action_index == 2
示例#4
0
def test_choose_move_index_with_transformation():
    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, -1)
    q_table.update_q_value(board, 2, 1)

    b_transformed = np.rot90(b_2d, 2).flatten()

    board_transformed = Board(b_transformed)

    move_index = choose_move_index([q_table], board_transformed, 0)

    assert move_index == 6
示例#5
0
def test_update_q_value():
    qtable = QTable()

    b_2d = np.array([[1.0,  0.0,  0.0],
                     [1.0, -1.0,  0.0],
                     [0.0,  1.0, -1.0]])
    b = b_2d.flatten()

    board = Board(b)

    qvalues = qtable.get_q_values(board)

    init = INITIAL_Q_VALUES_FOR_O

    expected_qvalues = {1: init, 2: init, 5: init, 6: init}

    assert qvalues == expected_qvalues

    b_rot90_flipud_2d = np.flipud(np.rot90(b_2d))
    b_rot90_flipud = b_rot90_flipud_2d.flatten()

    board_rot90_flipud = Board(b_rot90_flipud)

    qtable.update_q_value(board_rot90_flipud, 2, 0.8)
    qtable.update_q_value(board_rot90_flipud, 7, 0.7)

    assert len(qtable.qtable.cache) == 2

    expected_qvalues = {1: init, 2: init, 5: 0.7, 6: 0.8}

    qvalues = qtable.get_q_values(board)

    assert qvalues == expected_qvalues

    expected_qvalues = {2: 0.8, 3: init, 6: init, 7: 0.7}

    qvalues = qtable.get_q_values(board_rot90_flipud)

    assert qvalues == expected_qvalues
示例#6
0
def test_play_training_game_o_player():
    q_table = QTable()
    move_history = deque()
    q_table_player = CELL_O
    x_strategy = play_random_move
    o_strategy = create_training_player([q_table], move_history, 0)

    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_O
    first_board = np.copy(new_board)
    first_board[6] = CELL_X

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: init,
                                          3: init, 4: init, 5: init,
                                          7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_O
    second_board[8] = CELL_X

    val = 0.9 * 0.9
    expected_move_indexes_and_q_values = {1: val,  2: init,
                                          3: init, 4: init, 5: init,
                                          7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[1] = CELL_O
    third_board[5] = CELL_X

    val = 0.9 * 1.0
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init,
                                          7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    move_history = deque()
    o_strategy = create_training_player([q_table], move_history, 0)
    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_O
    first_board = np.copy(new_board)
    first_board[0] = CELL_X

    val = (1 - 0.9) * (0.9 * 0.81) + (0.9 * 0.0)
    expected_move_indexes_and_q_values = {1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[2] = CELL_O
    second_board[4] = CELL_X

    val = 0.9 * -1
    expected_move_indexes_and_q_values = {1: val,
                                          3: init, 5: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values
示例#7
0
def test_play_training_game_x_player():
    q_table = QTable()
    move_history = deque()
    q_table_player = CELL_X
    x_strategy = create_training_player([q_table], move_history, 0)
    o_strategy = play_random_move

    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_X
    first_board = np.copy(new_board)

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_X
    second_board[7] = CELL_O

    val = 0.9 * 0.9
    expected_move_indexes_and_q_values = {1: val, 2: init,
                                          3: init, 4: init, 5: init,
                                          6: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[1] = CELL_X
    third_board[5] = CELL_O

    val = 0.9 * 1.0
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init,
                                          6: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    move_history = deque()
    x_strategy = create_training_player([q_table], move_history, 0)
    play_training_game([q_table], move_history, q_table_player, x_strategy,
                       o_strategy, 0.9, 1)

    init = INITIAL_Q_VALUES_FOR_X
    first_board = np.copy(new_board)

    val = 0.1 * (0.81 * 0.9) + 0.9 * (0.9 * (0.9 * 0.81))
    expected_move_indexes_and_q_values = {0: val,  1: init, 2: val,
                                          3: init, 4: init, 5: init,
                                          6: val,  7: init, 8: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(first_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    second_board = np.copy(first_board)
    second_board[0] = CELL_X
    second_board[1] = CELL_O

    val = 0.9 * (0.9 * 0.81)
    expected_move_indexes_and_q_values = {2: val,
                                          3: init, 4: init, 5: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(second_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    third_board = np.copy(second_board)
    third_board[2] = CELL_X
    third_board[5] = CELL_O

    val = 0.9 * 0.81
    expected_move_indexes_and_q_values = {3: val, 4: init,
                                          6: init, 7: init, 8: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(third_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    fourth_board = np.copy(third_board)
    fourth_board[3] = CELL_X
    fourth_board[8] = CELL_O

    val = 0.81
    expected_move_indexes_and_q_values = {4: val,
                                          6: init, 7: init}

    move_indexes_and_q_values = q_table.get_q_values(Board(fourth_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values

    fifth_board = np.copy(fourth_board)
    fifth_board[4] = CELL_X
    fifth_board[7] = CELL_O

    val = 0.9
    expected_move_indexes_and_q_values = {6: val}

    move_indexes_and_q_values = q_table.get_q_values(Board(fifth_board))

    assert move_indexes_and_q_values == expected_move_indexes_and_q_values