def test_get_q_values_initial_o_turn(): b = np.array([[1, 0, -1], [1, 0, -1], [1, 0, 0]]).flatten() q_table = QTable() q_values = q_table.get_q_values(Board(b)) expected_q_values = {1: INITIAL_Q_VALUES_FOR_O, 4: INITIAL_Q_VALUES_FOR_O, 7: INITIAL_Q_VALUES_FOR_O, 8: INITIAL_Q_VALUES_FOR_O} assert q_values == expected_q_values
def test_update_q_value(): qtable = QTable() b_2d = np.array([[1.0, 0.0, 0.0], [1.0, -1.0, 0.0], [0.0, 1.0, -1.0]]) b = b_2d.flatten() board = Board(b) qvalues = qtable.get_q_values(board) init = INITIAL_Q_VALUES_FOR_O expected_qvalues = {1: init, 2: init, 5: init, 6: init} assert qvalues == expected_qvalues b_rot90_flipud_2d = np.flipud(np.rot90(b_2d)) b_rot90_flipud = b_rot90_flipud_2d.flatten() board_rot90_flipud = Board(b_rot90_flipud) qtable.update_q_value(board_rot90_flipud, 2, 0.8) qtable.update_q_value(board_rot90_flipud, 7, 0.7) assert len(qtable.qtable.cache) == 2 expected_qvalues = {1: init, 2: init, 5: 0.7, 6: 0.8} qvalues = qtable.get_q_values(board) assert qvalues == expected_qvalues expected_qvalues = {2: 0.8, 3: init, 6: init, 7: 0.7} qvalues = qtable.get_q_values(board_rot90_flipud) assert qvalues == expected_qvalues
def test_play_training_game_o_player(): q_table = QTable() move_history = deque() q_table_player = CELL_O x_strategy = play_random_move o_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_O first_board = np.copy(new_board) first_board[6] = CELL_X val = 0.9 * 0.81 expected_move_indexes_and_q_values = {0: val, 1: init, 2: init, 3: init, 4: init, 5: init, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_O second_board[8] = CELL_X val = 0.9 * 0.9 expected_move_indexes_and_q_values = {1: val, 2: init, 3: init, 4: init, 5: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[1] = CELL_O third_board[5] = CELL_X val = 0.9 * 1.0 expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values move_history = deque() o_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_O first_board = np.copy(new_board) first_board[0] = CELL_X val = (1 - 0.9) * (0.9 * 0.81) + (0.9 * 0.0) expected_move_indexes_and_q_values = {1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[2] = CELL_O second_board[4] = CELL_X val = 0.9 * -1 expected_move_indexes_and_q_values = {1: val, 3: init, 5: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values
def test_play_training_game_x_player(): q_table = QTable() move_history = deque() q_table_player = CELL_X x_strategy = create_training_player([q_table], move_history, 0) o_strategy = play_random_move play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_X first_board = np.copy(new_board) val = 0.9 * 0.81 expected_move_indexes_and_q_values = {0: val, 1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_X second_board[7] = CELL_O val = 0.9 * 0.9 expected_move_indexes_and_q_values = {1: val, 2: init, 3: init, 4: init, 5: init, 6: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[1] = CELL_X third_board[5] = CELL_O val = 0.9 * 1.0 expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 6: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values move_history = deque() x_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_X first_board = np.copy(new_board) val = 0.1 * (0.81 * 0.9) + 0.9 * (0.9 * (0.9 * 0.81)) expected_move_indexes_and_q_values = {0: val, 1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_X second_board[1] = CELL_O val = 0.9 * (0.9 * 0.81) expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 5: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[2] = CELL_X third_board[5] = CELL_O val = 0.9 * 0.81 expected_move_indexes_and_q_values = {3: val, 4: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values fourth_board = np.copy(third_board) fourth_board[3] = CELL_X fourth_board[8] = CELL_O val = 0.81 expected_move_indexes_and_q_values = {4: val, 6: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(fourth_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values fifth_board = np.copy(fourth_board) fifth_board[4] = CELL_X fifth_board[7] = CELL_O val = 0.9 expected_move_indexes_and_q_values = {6: val} move_indexes_and_q_values = q_table.get_q_values(Board(fifth_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values