示例#1
0
def test_choose_move_index_2nd_move():
    b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, 0.5)
    q_table.update_q_value(board, 2, 1)

    action_index = choose_move_index([q_table], board, 0)

    assert action_index == 2
示例#2
0
def test_choose_move_index_with_transformation():
    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    q_table = QTable()
    q_table.update_q_value(board, 1, -1)
    q_table.update_q_value(board, 2, 1)

    b_transformed = np.rot90(b_2d, 2).flatten()

    board_transformed = Board(b_transformed)

    move_index = choose_move_index([q_table], board_transformed, 0)

    assert move_index == 6
示例#3
0
def test_update_q_value():
    qtable = QTable()

    b_2d = np.array([[1.0,  0.0,  0.0],
                     [1.0, -1.0,  0.0],
                     [0.0,  1.0, -1.0]])
    b = b_2d.flatten()

    board = Board(b)

    qvalues = qtable.get_q_values(board)

    init = INITIAL_Q_VALUES_FOR_O

    expected_qvalues = {1: init, 2: init, 5: init, 6: init}

    assert qvalues == expected_qvalues

    b_rot90_flipud_2d = np.flipud(np.rot90(b_2d))
    b_rot90_flipud = b_rot90_flipud_2d.flatten()

    board_rot90_flipud = Board(b_rot90_flipud)

    qtable.update_q_value(board_rot90_flipud, 2, 0.8)
    qtable.update_q_value(board_rot90_flipud, 7, 0.7)

    assert len(qtable.qtable.cache) == 2

    expected_qvalues = {1: init, 2: init, 5: 0.7, 6: 0.8}

    qvalues = qtable.get_q_values(board)

    assert qvalues == expected_qvalues

    expected_qvalues = {2: 0.8, 3: init, 6: init, 7: 0.7}

    qvalues = qtable.get_q_values(board_rot90_flipud)

    assert qvalues == expected_qvalues
示例#4
0
def test_get_move_average_q_value_pairs():
    qtable_a = QTable()
    qtable_b = QTable()

    b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]])

    b = b_2d.flatten()

    board = Board(b)

    qtable_a.update_q_value(board, 1, 0.0)
    qtable_a.update_q_value(board, 2, 1.0)

    qtable_b.update_q_value(board, 1, -0.5)
    qtable_b.update_q_value(board, 2, 0.5)

    pairs = get_move_average_q_value_pairs([qtable_a, qtable_b], board)

    assert pairs == [(1, -0.25), (2, 0.75)]