def create_random_tictactoe_training_example(): tictactoe = TicTacToe(4) round_count = 0 target_value = '' action_sequence = [] while target_value == '': actions = tictactoe.get_possible_moves() random_action = randint(0, (len(actions) - 1)) action_sequence.append(actions[random_action]) if (round_count % 2) == 0: tictactoe.make_move(actions[random_action]) else: tictactoe.make_move(actions[random_action]) if round_count == 15 and not tictactoe.is_victory(): target_value = 'draw' if (round_count % 2) == 0 and tictactoe.is_victory(): target_value = 'win' if (round_count % 2) == 1 and tictactoe.is_victory(): target_value = 'lost' round_count += 1 return [action_sequence, target_value, tictactoe]
def testAgainstSecondMoveHeuristikAgentIn100Testgames(self): heuristicSearchAgentWins = 0 tdqAgent1000Wins = 0 for testGameCount in range(100): ttt = TicTacToe(4) tdqAgent1000 = TicTacToeTDQLearningAgent( TICTACTOE_4x4_TDQ_AGENT_1000_NAME, 4) while not ttt.is_terminal(): action = tdqAgent1000.suggestAction(ttt) print action ttt.make_move(action) if not ttt.is_terminal(): HeuristicSearchAgentTicTacToe.processAction(ttt) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': tdqAgent1000Wins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': heuristicSearchAgentWins += 1 print 'Second move heuristic search agent wins: ' + str( heuristicSearchAgentWins ) + ' games against TD-Q-Agent-1000 in 16 field Tic Tac Toe!' print 'First move TD-Q-Agent-1000 wins: ' + str( tdqAgent1000Wins ) + ' games against heuristic search agent in 16 field Tic Tac Toe!' self.assertTrue(tdqAgent1000Wins >= 50)
def getRandomNonTerminalTicTacToeState(): ttt = TicTacToe(4) randomDepth = randint(0, 15) for depth in range(randomDepth): if not ttt.is_victory(): ttt.make_move(getRandomAction(ttt.get_possible_moves())) if ttt.is_victory(): ttt.undo_move() return ttt
class TestDiagonalVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 0)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((3, 1)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((3, 2)) def test_diagonal_victory_top_left_to_bottom_right(self): self.example_ticTacToe_game.make_move((3, 3)) self.assertTrue(self.example_ticTacToe_game.is_diagonal_victory()) def test_digital_victory_top_right_to_bottom_left(self): example_tictactoe = TicTacToe(4) example_tictactoe.make_move((0, 3)) example_tictactoe.make_move((0, 0)) example_tictactoe.make_move((1, 2)) example_tictactoe.make_move((1, 0)) example_tictactoe.make_move((2, 1)) example_tictactoe.make_move((2, 0)) example_tictactoe.make_move((3, 0)) self.assertTrue(example_tictactoe.is_diagonal_victory()) def test_no_digital_victory(self): self.assertFalse(self.example_ticTacToe_game.is_vertical_victory())
def test_digital_victory_top_right_to_bottom_left(self): example_tictactoe = TicTacToe(4) example_tictactoe.make_move((0, 3)) example_tictactoe.make_move((0, 0)) example_tictactoe.make_move((1, 2)) example_tictactoe.make_move((1, 0)) example_tictactoe.make_move((2, 1)) example_tictactoe.make_move((2, 0)) example_tictactoe.make_move((3, 0)) self.assertTrue(example_tictactoe.is_diagonal_victory())
def testAgainstFirstMoveRandomAgentIn100Testgames(self): randomAgentWins = 0 tdqAgent1000Wins = 0 for testGameCount in range(100): ttt = TicTacToe(3) tdqAgent1000 = TicTacToeTDQLearningAgent(TICTACTOE_3x3_TDQ_AGENT_1000_NAME, 3) while not ttt.is_terminal(): RandomAgent.processTicTacToeAction(ttt) if not ttt.is_terminal(): ttt.make_move(tdqAgent1000.suggestAction(ttt)) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': randomAgentWins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': tdqAgent1000Wins += 1 print 'First Move random agent wins: ' + str( randomAgentWins) + ' games against TD-Q-Agent-1000 in 9 field Tic Tac Toe!' print 'Second Move TD-Q-Agent-1000 wins: ' + str( tdqAgent1000Wins) + ' games against random agent in 9 field Tic Tac Toe!' self.assertTrue(tdqAgent1000Wins >= 50)
def learnTictactoe(self, gamesToPlay): """Represents the self play and learning mode of the TD-Q-Agent. Parameters ---------- gamesToPlay : int The amount of training games to play against itself.""" for gameCount in range(gamesToPlay): ttt = TicTacToe(self.__dimension) logging.info('Learning against itself game: ' + str(gameCount)) while not ttt.is_terminal(): suggestedAction = self.qLearnIteration(ttt, ttt.getReward(), 0.4, 1) ttt.make_move(suggestedAction) if ttt.is_terminal(): self.qLearnIteration(ttt, ttt.getReward(), 0.4, 1) self.__s = None self.__a = None self.__r = None if gameCount % 100 == 0: self.__random_factor += 1
class TestVerticalVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 3)) self.example_ticTacToe_game.make_move((1, 0)) self.example_ticTacToe_game.make_move((2, 3)) self.example_ticTacToe_game.make_move((2, 0)) self.example_ticTacToe_game.make_move((1, 3)) def test_vertical_victory(self): self.example_ticTacToe_game.make_move((3, 0)) self.assertTrue(self.example_ticTacToe_game.is_vertical_victory()) def test_no_vertical_victory(self): self.assertFalse(self.example_ticTacToe_game.is_vertical_victory())
def test_10_possible_moves(self): tictactoe = TicTacToe(4) tictactoe.make_move((0, 0)) tictactoe.make_move((0, 1)) tictactoe.make_move((0, 2)) tictactoe.make_move((0, 3)) tictactoe.make_move((1, 0)) tictactoe.make_move((2, 0)) self.assertEqual([(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 0), (3, 1), (3, 2), (3, 3)], tictactoe.get_possible_moves())
def test_undo_move_3(self): tictactoe = TicTacToe(4) tictactoe.make_move((0, 0)) tictactoe.make_move((1, 0)) tictactoe.make_move((0, 1)) tictactoe.make_move((2, 0)) tictactoe.make_move((0, 3)) tictactoe.undo_move() tictactoe.undo_move() action_sequence = [(0, 0), (1, 0), (0, 1)] expected_tictactoe_game = TicTacToe(4) expected_tictactoe_game.initialize_game_matrix_with_action_sequence( action_sequence) self.assertTrue( (expected_tictactoe_game.game_matrix == tictactoe.game_matrix ).all())
def result(state, action): copy_state = TicTacToe(3) copy_state.initialize_game_with_another_game(state) copy_state.make_move(action) return copy_state
class TestVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((0, 1)) self.example_ticTacToe_game.make_move((0, 2)) self.example_ticTacToe_game.make_move((0, 3)) self.example_ticTacToe_game.make_move((1, 0)) self.example_ticTacToe_game.make_move((2, 0)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((2, 1)) self.example_ticTacToe_game.make_move((1, 2)) self.example_ticTacToe_game.make_move((3, 0)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((3, 1)) def test_no_victory(self): self.assertFalse(self.example_ticTacToe_game.is_victory()) def test_only_horizontal_victory(self): self.example_ticTacToe_game.make_move((1, 3)) self.assertTrue(self.example_ticTacToe_game.is_victory()) def test_only_vertical_victory(self): self.example_ticTacToe_game.make_move((3, 2)) self.assertTrue(self.example_ticTacToe_game.is_victory()) def test_only_diagonal_victory(self): self.example_ticTacToe_game.make_move((3, 3)) self.assertTrue(self.example_ticTacToe_game.is_victory())
class TestMakeMove(unittest.TestCase): def setUp(self): self.ticTacToe = TicTacToe(4) self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 3)) self.example_ticTacToe_game.make_move((0, 1)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((0, 2)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((0, 3)) def test_put_seven_game_tokens(self): self.ticTacToe.make_move((0, 0)) self.ticTacToe.make_move((3, 3)) self.ticTacToe.make_move((0, 1)) self.ticTacToe.make_move((2, 2)) self.ticTacToe.make_move((0, 2)) self.ticTacToe.make_move((1, 1)) self.ticTacToe.make_move((0, 3)) equality_matrix = self.example_ticTacToe_game.game_matrix == self.ticTacToe.game_matrix self.assertTrue(equality_matrix.all()) def test_if_value_not_changes_when_position_is_taken(self): copy_example_ticTacToe_game = self.example_ticTacToe_game copy_example_ticTacToe_game.make_move((0, 0)) copy_example_ticTacToe_game.make_move((3, 3)) copy_example_ticTacToe_game.make_move((0, 1)) copy_example_ticTacToe_game.make_move((2, 2)) self.assertTrue((copy_example_ticTacToe_game.game_matrix == self.example_ticTacToe_game.game_matrix).all())
def getRandomTerminalTicTacToeState(): ttt = TicTacToe(4) while not isTicTacToeStateTerminal(ttt): ttt.make_move(getRandomAction(ttt.get_possible_moves())) return ttt
elif state.count_of_game_tokens_in_game() % 2 == 1: return 'O' def terminal_test(state): if state.is_victory(): return True elif not state.is_victory() and state.count_of_game_tokens_in_game( ) == state.get_maximal_amount_of_game_tokens(): return True else: return False def utility(state): if player(state) == 'X' and state.is_victory(): return -1 elif player(state) != 'X' and state.is_victory(): return 1 elif state.count_of_game_tokens_in_game( ) == state.get_maximal_amount_of_game_tokens() and not state.is_victory(): return 0 ttt = TicTacToe(4) ttt.make_move((1, 1)) ttt.make_move((2, 1)) ttt.make_move((2, 2)) ttt.make_move((1, 2)) print ttt.printable_game_matrix() print iterative_deepening(ttt, 1)
def utility(state): if player(state) == 'X' and state.is_victory(): return -1 elif player(state) != 'X' and state.is_victory(): return 1 elif state.count_of_game_tokens_in_game() == state.get_maximal_amount_of_game_tokens() and not state.is_victory(): return 0 state = TicTacToe(4) # state = result(result(result(result(result(result(ttt_state, (0, 0)), (0, 1)), (0,2)), (1,0)), (1,2)), (1,1)) #state = result(result(ttt_state, (1, 1)), (2, 0)) #state = result(result(state, (0, 0)), (0, 1)) #state = result(result(result(state, (0, 0)), (0, 1)), (1, 0)) #state = result(result(result(result(state, (0, 0)), (0, 1)), (1, 0)), (2, 0)) #state = result(state, (1, 1)) #state = result(result(state, (1, 1)), (0, 0)) #state = result(result(result(state, (1, 1)), (0, 0)), (0, 1)) #state = result(result(result(result(result(result(state, (1, 1)), (0, 0)), (0, 1)), (2, 1)), (1, 2)), (2, 2)) state.make_move((1, 1)) state.make_move((2, 0)) state.make_move((1, 0)) state.make_move((2, 1)) state.make_move((1, 2)) state.make_move((2, 2)) print state.printable_game_matrix() time_before_funciton_call = time.time() print alpha_beta_search(state) print 'Time in milliseconds: ' + str(int((time.time() - time_before_funciton_call) * 1000)) print count print cutoffs