def test_start_state_when_opponent_is_next_to_move(self): game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']]) ab = AlphaBeta() mdp = FixedGameMDP(game, ab, 1) expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], ['O', 'X', 'X']]) self.assertEqual(mdp.start_state(), expected)
def test_cur_state_when_opponent_should_move_to_start(self): game = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], [' ', 'X', 'X']]) ab = AlphaBeta() mdp = FixedGameMDP(game, ab, 1) env = Environment(mdp) expected = TicTacToe([['X', 'O', 'O'], ['X', 'O', ' '], ['O', 'X', 'X']]) self.assertEqual(env.cur_state(), expected)
def test_do_action(self): # X - O # - - X # - - O game = TicTacToe().make_moves([1, 3, 6, 9]) mdp = FixedGameMDP(game.copy(), AlphaBeta(), 1) env = Environment(mdp) env.do_action(7) expected = TicTacToe().make_moves([1, 3, 6, 9, 7, 4]) self.assertEqual(env.cur_state(), expected)
def test_transitions(self): # X - O # - - X # - - O game = TicTacToe().make_moves([1, 3, 6, 9]) # Create an mdp where AlphaBeta is the # second player to move mdp = FixedGameMDP(game, AlphaBeta(), 1) # Put X in 7 slot, and the mdp should # make the transition assuming O moving 4 transitions = mdp.transitions(game, 7) self.assertEqual(len(transitions), 1) next_game, prob = transitions[0] self.assertEqual(next_game, game.copy().make_moves([7, 4])) self.assertEqual(prob, 1.0)
def test_reward_when_agent_moves_second_and_losses(self): ab = AlphaBeta() opp_idx = 0 mdp = FixedGameMDP(self.game, ab, opp_idx) cur_state = TicTacToe().make_moves([1, 4, 2, 5]) # cur_state: # X X - # O O - # - - - action = 3 next_state = cur_state.copy().make_move(action) # next_state: # X X X # O O - # - - - reward = mdp.reward(cur_state, action, next_state) self.assertEqual(reward, -1.0)
def test_reward_when_agent_moves_second_and_wins(self): ab = AlphaBeta() opp_idx = 0 mdp = FixedGameMDP(self.game, ab, opp_idx) # Opponent moves first, Agent second cur_state = TicTacToe().make_moves([1, 4, 2, 5, 7]) # cur_state: # X X - # O O - # X - - action = 6 next_state = cur_state.copy().make_move(action) # next_state: # X X - # O O O # X - - reward = mdp.reward(cur_state, action, next_state) self.assertEqual(reward, 1.0)
def setUp(self): self.ab = AlphaBeta()
def test_cur_state(self): game = TicTacToe() mdp = FixedGameMDP(game, AlphaBeta(), 1) env = Environment(mdp) self.assertEqual(env.cur_state(), mdp.start_state()) self.assertEqual(env.cur_state(), game)
def setUp(self): self.game = TicTacToe() self.mdp = FixedGameMDP(self.game, AlphaBeta(), 1) self.players = [RandPlayer(), RandPlayer()]