def __run__(self, player1, player2): """ Runs an episode of the game :param player1: :param player2: :return: The original color of the winning player """ self.board = OthelloBoard() players = player1, player2 if self.gui: self.gui.show_game(self.board) while True: if len(self.board.get_valid_moves(players[0].color)) > 0: move = players[0].get_move(self.board.copy()) self.board.apply_move(move, players[0].color) if self.gui: self.gui.flash_move(move, players[0].color) self.gui.update(self.board, players[1]) winner = self.board.game_won() if winner is not None: if self.gui: self.gui.show_winner(winner, self.board) return config.get_label_from_winner_color( player1, player2, winner) players = list(reversed(players))
def test_Board_CountStones(self): board = OthelloBoard() self.assertEqual((2, 2), board.count_stones()) board.apply_move((2, 3), config.BLACK) board.apply_move((2, 2), config.WHITE) board.apply_move((2, 1), config.BLACK) board.apply_move((1, 1), config.WHITE) self.assertEqual((4, 4), board.count_stones())
def generate_supervised_training_data(cls, games, labeling_strategy): """ Generates training data by applying random moves to a board and labeling each sample with the move that :param labeling_strategy would have taken given the board. :param games: The number of games to be simulated :param labeling_strategy: The strategy used to label each sample. The label equals labeling_strategy.get_move(board) :return: a list of tuples(board_sample, move_label) """ labeling_strategy.color = cls.config.BLACK generator = RandomPlayer() color_iterator = OthelloBaseExperiment.AlternatingColorIterator() start = datetime.now() training_set = [] for game in range(games): board = OthelloBoard() while board.game_won() is None: # generate training pair expert_move = labeling_strategy.get_move(board) training_set.append((board.copy(), expert_move)) # prepare for next sample color = color_iterator.__next__() generator.color = color move = generator.get_move(board) if move is not None: board.apply_move(move, color) print("Generated %s training pairs form %s games in %s" % (len(training_set), games, datetime.now() - start)) return training_set
def test_Board_ApplyIllegalMove(self): board = OthelloBoard() board.apply_move((2,3), config.BLACK) self.assertEqual(board.illegal_move, None) board.apply_move((1, 1), config.BLACK) self.assertEqual(board.illegal_move, config.BLACK)
def test_Board_GameWon(self): # Case 1: Full board board = OthelloBoard() self.assertIsNone(board.game_won(), msg="Empty Board") board.apply_move((2, 3), config.BLACK) board.apply_move((2, 2), config.WHITE) board.apply_move((2, 1), config.BLACK) board.apply_move((1, 1), config.WHITE) board.apply_move((5, 4), config.BLACK) board.apply_move((5, 5), config.WHITE) board.apply_move((5, 6), config.BLACK) board.apply_move((6, 6), config.WHITE) self.assertIsNone(board.game_won(), msg="Empty Board") for col in range(len(board.board)): for tile in range(len(board.board)): if board.board[col, tile] == config.EMPTY: board.board[col, tile] = config.BLACK board.legal_moves = {} # This is required because moves were directly set to the board instead of using apply_move self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count") # Case 2: No valid moves board = OthelloBoard() board.apply_move((3, 2), config.BLACK) board.apply_move((4, 5), config.BLACK) self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count after no players could perform any legal moves") # Case 3: Regular, deterministic game for i in range(32): game = Othello((DeterministicPlayer(), DeterministicPlayer())) game.run_simulations(1) winner = game.board.game_won() self.assertEqual(winner, config.WHITE, "Winner of deterministic game was not Black")
def test_Board_applyValidMoves(self): board = OthelloBoard() self.assertEqual(board.get_valid_moves(config.BLACK), {(2, 3), (3, 2), (4, 5), (5, 4)}, msg="Valid moves incorrect") self.assertEqual(board.get_valid_moves(config.WHITE), {(2, 4), (4, 2), (3, 5), (5, 3)}, msg="Valid moves incorrect") board.apply_move((3, 2), config.BLACK) self.assertEqual(board.get_valid_moves(config.BLACK), {(4, 5), (5, 4), (5, 5)}, msg="Valid moves incorrect") self.assertEqual(board.get_valid_moves(config.WHITE), {(4, 2), (2, 4), (2, 2)}, msg="Valid moves incorrect") b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((2, 2), config.WHITE) b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((1, 2), config.BLACK) b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((1, 1), config.WHITE) b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((1, 0), config.BLACK) b = [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((0, 0), config.WHITE) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((4, 5), config.BLACK) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((5, 5), config.WHITE) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((6, 5), config.BLACK) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((6, 6), config.WHITE) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((6, 7), config.BLACK) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] self.assertTrue((board.board == b).all()) board.apply_move((7, 7), config.WHITE) b = [[-1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, -1.0, 1.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0]] self.assertTrue((board.board == b).all())
def test_Board_Representation(self): iterator = OthelloBaseExperiment.AlternatingColorIterator() boards = [] inverses = [] for i in range(10): board = OthelloBoard() inverse_board = OthelloBoard() inverse_board.board[3, 3] = config.BLACK inverse_board.board[3, 4] = config.WHITE inverse_board.board[4, 4] = config.BLACK inverse_board.board[4, 3] = config.WHITE for j in range(30): color = iterator.__next__() legal_moves = board.get_valid_moves(color) if legal_moves: move = random.choice(list(legal_moves)) board.apply_move(move, color) boards.append(board.copy()) inverse_board.apply_move(move, board.other_color(color)) inverses.append((inverse_board.copy())) for i in range(len(boards)): rep = boards[i].get_representation(config.WHITE) self.assertTrue((rep == inverses[i].board).all(), msg="Inverting board failed")
class Othello(TwoPlayerGame): def __init__(self, players, gui=None): super(Othello, self).__init__(players=players, config=config, gui=gui) self.player1.color = config.BLACK self.player2.color = config.WHITE for player in players: player.original_color = player.color def __run__(self, player1, player2): """ Runs an episode of the game :param player1: :param player2: :return: The original color of the winning player """ self.board = OthelloBoard() players = player1, player2 if self.gui: self.gui.show_game(self.board) while True: if len(self.board.get_valid_moves(players[0].color)) > 0: move = players[0].get_move(self.board.copy()) self.board.apply_move(move, players[0].color) if self.gui: self.gui.flash_move(move, players[0].color) self.gui.update(self.board, players[1]) winner = self.board.game_won() if winner is not None: if self.gui: self.gui.show_winner(winner, self.board) return config.get_label_from_winner_color( player1, player2, winner) players = list(reversed(players)) def run_simulations(self, episodes, switch_colors=True, switch_players=True): """ Runs a number of games using the given players and returns statistics over all games run. If both :param switch_colors and :param switch_players are set, all four possible starting positions will iterated through. :param episodes: The number of games to run :param switch_colors: Flag specifying whether to alternate the players colors during play :param switch_players: Flag specifying whether to alternate the starting player :return: The results and average losses per episode where results is a list of the original colors of the winning player ([original_winning_color]) """ simulation_players = [self.player1, self.player2] results = [] losses = [] for episode in range(episodes): if switch_colors and episode != 0 and episode % 2 == 0: simulation_players[0].color, simulation_players[ 1].color = simulation_players[1].color, simulation_players[ 0].color # Alternate starting color, effectively resulting in 4 starting positions rather than 2. if switch_players and episode != 0 and episode + 1 % 2: simulation_players = list(reversed(simulation_players)) winner = self.__run__(simulation_players[0], simulation_players[1]) player_losses = [] for player in simulation_players: loss = player.register_winner(winner) if loss is not None: player_losses.append(loss) losses += player_losses results.append(winner) for player in simulation_players: player.color = player.original_color return results, losses