def test_start_state_when_opponent_is_next_to_move(self): game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']]) ab = AlphaBeta() mdp = FixedGameMDP(game, ab, 1) expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], ['O', 'X', 'X']]) self.assertEqual(mdp.start_state(), expected)
def test_cur_state_when_opponent_should_move_to_start(self): game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']]) ab = AlphaBeta() mdp = FixedGameMDP(game, ab, 1) env = Environment(mdp) expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], ['O', 'X', 'X']]) self.assertEqual(env.cur_state(), expected)
def test_do_action(self): # X - O # - - X # - - O game = TicTacToe().make_moves([1, 3, 6, 9]) mdp = FixedGameMDP(game.copy(), AlphaBeta(), 1) env = Environment(mdp) env.do_action(7) expected = TicTacToe().make_moves([1, 3, 6, 9, 7, 4]) self.assertEqual(env.cur_state(), expected)
def test_hash(self): game1 = TicTacToe().make_moves([1, 7]) table = {game1: 'game1'} self.assertEqual(len(table), 1) self.assertEqual(table[game1], 'game1') game2 = TicTacToe().make_moves([1, 7]) table[game2] = 'game2' self.assertEqual(table[game1], 'game2') self.assertEqual(table[game2], 'game2') game3 = TicTacToe().make_moves([1, 7, 9]) table[game3] = 'game3' self.assertEqual(len(table), 2) self.assertEqual(table[game3], 'game3')
def test_reward_when_game_is_over_and_is_draw(self): cur_state = TicTacToe().make_moves([1, 3, 2, 4, 6, 5, 7, 8]) # cur_state: # X X O # O O X # X O - action = 9 next_state = cur_state.copy().make_move(action) # next_state: # X X O # O O X # X O X reward = self.mdp.reward(cur_state, action, next_state) self.assertEqual(reward, 0.0)
def test_transitions(self): # X - O # - - X # - - O game = TicTacToe().make_moves([1, 3, 6, 9]) # Create an mdp where AlphaBeta is the # second player to move mdp = FixedGameMDP(game, AlphaBeta(), 1) # Put X in 7 slot, and the mdp should # make the transition assuming O moving 4 transitions = mdp.transitions(game, 7) self.assertEqual(len(transitions), 1) next_game, prob = transitions[0] self.assertEqual(next_game, game.copy().make_moves([7, 4])) self.assertEqual(prob, 1.0)
def test_reward_when_agent_moves_second_and_losses(self): ab = AlphaBeta() opp_idx = 0 mdp = FixedGameMDP(self.game, ab, opp_idx) cur_state = TicTacToe().make_moves([1, 4, 2, 5]) # cur_state: # X X - # O O - # - - - action = 3 next_state = cur_state.copy().make_move(action) # next_state: # X X X # O O - # - - - reward = mdp.reward(cur_state, action, next_state) self.assertEqual(reward, -1.0)
def test_reward_when_agent_moves_second_and_wins(self): ab = AlphaBeta() opp_idx = 0 mdp = FixedGameMDP(self.game, ab, opp_idx) # Opponent moves first, Agent second cur_state = TicTacToe().make_moves([1, 4, 2, 5, 7]) # cur_state: # X X - # O O - # X - - action = 6 next_state = cur_state.copy().make_move(action) # next_state: # X X - # O O O # X - - reward = mdp.reward(cur_state, action, next_state) self.assertEqual(reward, 1.0)
def test_legal_moves_are_empty_when_is_over(self): game = TicTacToe([['X', 'X', 'X'], ['O', 'O', ' '], [' ', ' ', ' ']]) self.assertTrue(len(game.legal_moves()) == 0)
def setUp(self): self.game = TicTacToe()
def test_equal(self): moves = [1, 2] self.game.make_moves(moves) other = TicTacToe().make_moves(moves) self.assertEqual(self.game, other)
def test_not_equal(self): other = TicTacToe().make_moves([1]) self.assertNotEqual(self.game, other)
def test_outcomes_draw(self): game = TicTacToe([['X', 'X', 'O'], ['O', 'O', 'X'], ['X', 'O', 'X']]) self.assertEqual(game.outcomes(), ['D', 'D'])
def test_outcomes_win_second_player(self): game = TicTacToe([['X', 'X', ' '], ['O', 'O', 'O'], [' ', ' ', 'X']]) self.assertEqual(game.outcomes(), ['L', 'W'])
def setUp(self): self.game = TicTacToe() self.mdp = FixedGameMDP(self.game, AlphaBeta(), 1) self.players = [RandPlayer(), RandPlayer()]
def test_cur_state(self): game = TicTacToe() mdp = FixedGameMDP(game, AlphaBeta(), 1) env = Environment(mdp) self.assertEqual(env.cur_state(), mdp.start_state()) self.assertEqual(env.cur_state(), game)
def test_with_board(self): game = TicTacToe([['X', ' ', 'X'], ['O', ' ', 'X'], [' ', 'O', ' ']]) self.assertEqual(game.legal_moves(), [2, 5, 7, 9]) self.assertEqual(game.cur_player(), 1)