def __run__(self, player1, player2):
        """
        Runs an episode of the game

        :param player1:
        :param player2:
        :return: The original color of the winning player
        """
        self.board = OthelloBoard()
        players = player1, player2
        if self.gui:
            self.gui.show_game(self.board)

        while True:
            if len(self.board.get_valid_moves(players[0].color)) > 0:
                move = players[0].get_move(self.board.copy())
                self.board.apply_move(move, players[0].color)

                if self.gui:
                    self.gui.flash_move(move, players[0].color)
                    self.gui.update(self.board, players[1])

                winner = self.board.game_won()
                if winner is not None:
                    if self.gui:
                        self.gui.show_winner(winner, self.board)
                    return config.get_label_from_winner_color(
                        player1, player2, winner)

            players = list(reversed(players))
Пример #2
0
    def test_Board_CountStones(self):
        board = OthelloBoard()
        self.assertEqual((2, 2), board.count_stones())

        board.apply_move((2, 3), config.BLACK)
        board.apply_move((2, 2), config.WHITE)
        board.apply_move((2, 1), config.BLACK)
        board.apply_move((1, 1), config.WHITE)

        self.assertEqual((4, 4), board.count_stones())
Пример #3
0
    def generate_supervised_training_data(cls, games, labeling_strategy):
        """
        Generates training data by applying random moves to a board and labeling each sample with the move that :param labeling_strategy would have taken given the board.

        :param games: The number of games to be simulated
        :param labeling_strategy: The strategy used to label each sample. The label equals labeling_strategy.get_move(board)
        :return: a list of tuples(board_sample, move_label)
        """

        labeling_strategy.color = cls.config.BLACK

        generator = RandomPlayer()
        color_iterator = OthelloBaseExperiment.AlternatingColorIterator()

        start = datetime.now()
        training_set = []
        for game in range(games):
            board = OthelloBoard()
            while board.game_won() is None:
                # generate training pair
                expert_move = labeling_strategy.get_move(board)
                training_set.append((board.copy(), expert_move))

                # prepare for next sample
                color = color_iterator.__next__()
                generator.color = color
                move = generator.get_move(board)
                if move is not None:
                    board.apply_move(move, color)

        print("Generated %s training pairs form %s games in %s" %
              (len(training_set), games, datetime.now() - start))
        return training_set
Пример #4
0
    def test_Board_ApplyIllegalMove(self):
        board = OthelloBoard()
        board.apply_move((2,3), config.BLACK)
        self.assertEqual(board.illegal_move, None)

        board.apply_move((1, 1), config.BLACK)
        self.assertEqual(board.illegal_move, config.BLACK)
Пример #5
0
    def test_Board_GameWon(self):

        # Case 1: Full board
        board = OthelloBoard()
        self.assertIsNone(board.game_won(), msg="Empty Board")
        board.apply_move((2, 3), config.BLACK)
        board.apply_move((2, 2), config.WHITE)
        board.apply_move((2, 1), config.BLACK)
        board.apply_move((1, 1), config.WHITE)
        board.apply_move((5, 4), config.BLACK)
        board.apply_move((5, 5), config.WHITE)
        board.apply_move((5, 6), config.BLACK)
        board.apply_move((6, 6), config.WHITE)
        self.assertIsNone(board.game_won(), msg="Empty Board")

        for col in range(len(board.board)):
            for tile in range(len(board.board)):
                if board.board[col, tile] == config.EMPTY:
                    board.board[col, tile] = config.BLACK

        board.legal_moves = {}  # This is required because moves were directly set to the board instead of using apply_move
        self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count")

        # Case 2: No valid moves
        board = OthelloBoard()
        board.apply_move((3, 2), config.BLACK)
        board.apply_move((4, 5), config.BLACK)
        self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count after no players could perform any legal moves")

        # Case 3: Regular, deterministic game
        for i in range(32):
            game = Othello((DeterministicPlayer(), DeterministicPlayer()))
            game.run_simulations(1)
            winner = game.board.game_won()
            self.assertEqual(winner, config.WHITE, "Winner of deterministic game was not Black")
Пример #6
0
    def test_Board_applyValidMoves(self):
        board = OthelloBoard()
        self.assertEqual(board.get_valid_moves(config.BLACK), {(2, 3), (3, 2), (4, 5), (5, 4)}, msg="Valid moves incorrect")
        self.assertEqual(board.get_valid_moves(config.WHITE), {(2, 4), (4, 2), (3, 5), (5, 3)}, msg="Valid moves incorrect")
        board.apply_move((3, 2), config.BLACK)
        self.assertEqual(board.get_valid_moves(config.BLACK), {(4, 5), (5, 4), (5, 5)}, msg="Valid moves incorrect")
        self.assertEqual(board.get_valid_moves(config.WHITE), {(4, 2), (2, 4), (2, 2)}, msg="Valid moves incorrect")

        b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((2, 2), config.WHITE)
        b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((1, 2), config.BLACK)
        b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((1, 1), config.WHITE)
        b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((1, 0), config.BLACK)
        b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((0, 0), config.WHITE)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((4, 5), config.BLACK)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((5, 5), config.WHITE)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((6, 5), config.BLACK)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((6, 6), config.WHITE)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((6, 7), config.BLACK)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
        self.assertTrue((board.board == b).all())

        board.apply_move((7, 7), config.WHITE)
        b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0]]
        self.assertTrue((board.board == b).all())
Пример #7
0
    def test_Board_Representation(self):
        iterator = OthelloBaseExperiment.AlternatingColorIterator()
        boards = []
        inverses = []

        for i in range(10):
            board = OthelloBoard()
            inverse_board = OthelloBoard()
            inverse_board.board[3, 3] = config.BLACK
            inverse_board.board[3, 4] = config.WHITE
            inverse_board.board[4, 4] = config.BLACK
            inverse_board.board[4, 3] = config.WHITE
            for j in range(30):
                color = iterator.__next__()
                legal_moves = board.get_valid_moves(color)
                if legal_moves:
                    move = random.choice(list(legal_moves))

                    board.apply_move(move, color)
                    boards.append(board.copy())

                    inverse_board.apply_move(move, board.other_color(color))
                    inverses.append((inverse_board.copy()))

        for i in range(len(boards)):
            rep = boards[i].get_representation(config.WHITE)
            self.assertTrue((rep == inverses[i].board).all(), msg="Inverting board failed")
class Othello(TwoPlayerGame):
    def __init__(self, players, gui=None):
        super(Othello, self).__init__(players=players, config=config, gui=gui)

        self.player1.color = config.BLACK
        self.player2.color = config.WHITE

        for player in players:
            player.original_color = player.color

    def __run__(self, player1, player2):
        """
        Runs an episode of the game

        :param player1:
        :param player2:
        :return: The original color of the winning player
        """
        self.board = OthelloBoard()
        players = player1, player2
        if self.gui:
            self.gui.show_game(self.board)

        while True:
            if len(self.board.get_valid_moves(players[0].color)) > 0:
                move = players[0].get_move(self.board.copy())
                self.board.apply_move(move, players[0].color)

                if self.gui:
                    self.gui.flash_move(move, players[0].color)
                    self.gui.update(self.board, players[1])

                winner = self.board.game_won()
                if winner is not None:
                    if self.gui:
                        self.gui.show_winner(winner, self.board)
                    return config.get_label_from_winner_color(
                        player1, player2, winner)

            players = list(reversed(players))

    def run_simulations(self,
                        episodes,
                        switch_colors=True,
                        switch_players=True):
        """
        Runs a number of games using the given players and returns statistics over all games run.


        If both :param switch_colors and :param switch_players are set, all four possible starting positions will iterated through.
        :param episodes: The number of games to run
        :param switch_colors: Flag specifying whether to alternate the players colors during play
        :param switch_players: Flag specifying whether to alternate the starting player
        :return: The results and average losses per episode where results is a list of the original colors of the winning player ([original_winning_color])
        """

        simulation_players = [self.player1, self.player2]

        results = []
        losses = []

        for episode in range(episodes):
            if switch_colors and episode != 0 and episode % 2 == 0:
                simulation_players[0].color, simulation_players[
                    1].color = simulation_players[1].color, simulation_players[
                        0].color

            # Alternate starting color, effectively resulting in 4 starting positions rather than 2.
            if switch_players and episode != 0 and episode + 1 % 2:
                simulation_players = list(reversed(simulation_players))

            winner = self.__run__(simulation_players[0], simulation_players[1])
            player_losses = []
            for player in simulation_players:
                loss = player.register_winner(winner)
                if loss is not None:
                    player_losses.append(loss)

            losses += player_losses
            results.append(winner)

        for player in simulation_players:
            player.color = player.original_color

        return results, losses