Exemplo n.º 1
0
    def __run_episode__(self, generator):
        player = self.player

        rewards = []
        color_iterator = self.AlternatingColorIterator()
        board = TicTacToeBoard()
        for i in range(9):
            player_move = player.get_move(board)

            # Win if predicted move is legal, loss otherwise
            reward = config.LABEL_WIN if player_move in board.get_valid_moves(
                player.color) else config.LABEL_LOSS
            rewards.append(reward)

            # prepare for next sample
            board.apply_move(generator.get_move(board),
                             color_iterator.__next__())

        loss = player.strategy.update()
        player.strategy.rewards = []

        average_reward = np.mean(rewards)
        del rewards[:]
        self.add_results([("Losses", loss), ("Score", average_reward)])

        return loss, average_reward
Exemplo n.º 2
0
    def generate_supervised_training_data(cls, games, labeling_strategy):
        """
        Generates training data by applying random moves to a board and labeling each sample with the move that :param labeling_strategy would have taken given the board.

        :param games: The number of games to be simulated
        :param labeling_strategy: The strategy used to label each sample. The label equals labeling_strategy.get_move(board)
        :return: a list of tuples(board_sample, move_label)
        """

        labeling_strategy.color = cls.config.BLACK

        generator = RandomPlayer()
        color_iterator = TicTacToeBaseExperiment.AlternatingColorIterator()

        start = datetime.now()
        training_set = []
        for game in range(games):
            board = TicTacToeBoard()
            for i in range(9):
                # generate training pair
                expert_move = labeling_strategy.get_move(board)
                training_set.append((board.copy(), expert_move))

                # prepare for next sample
                move = generator.get_move(board)
                board.apply_move(move, color_iterator.__next__())

        print("Generated %s training pairs form %s games in %s" % (len(training_set), games, datetime.now() - start))
        return training_set
Exemplo n.º 3
0
    def test_Board_ApplyValidMoves(self):
        board = TicTacToeBoard()
        if config.BOARD_SIZE == 3:
            self.assertEqual(set(board.get_valid_moves(config.BLACK)),
                             set([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1),
                                  (1, 2), (2, 0), (2, 1), (2, 2)]),
                             msg="Valid moves incorrect")
            self.assertEqual(set(board.get_valid_moves(config.WHITE)),
                             set([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1),
                                  (1, 2), (2, 0), (2, 1), (2, 2)]),
                             msg="Valid moves incorrect")
        else:
            self.assertEqual(config.BOARD_SIZE**2,
                             len(board.get_valid_moves(config.BLACK)),
                             msg="Incorrect Number of valid moves")
        board.apply_move((1, 1), config.BLACK)

        if config.BOARD_SIZE == 3:
            self.assertEqual(set(board.get_valid_moves(config.BLACK)),
                             set([(0, 0), (0, 1), (0, 2), (1, 0), (1, 2),
                                  (2, 0), (2, 1), (2, 2)]),
                             msg="Valid moves incorrect")
            self.assertEqual(set(board.get_valid_moves(config.WHITE)),
                             set([(0, 0), (0, 1), (0, 2), (1, 0), (1, 2),
                                  (2, 0), (2, 1), (2, 2)]),
                             msg="Valid moves incorrect")
Exemplo n.º 4
0
    def test_Board_CountStones(self):
        board = TicTacToeBoard()
        board.apply_move((0, 0), config.BLACK)
        board.apply_move((1, 1), config.WHITE)
        board.apply_move((2, 2), config.BLACK)

        board.apply_move((1, 2), config.WHITE)
        board.apply_move((1, 0), config.BLACK)

        self.assertEqual((3, 2), board.count_stones())
Exemplo n.º 5
0
    def test_DummyUpdate(self):
        board = TicTacToeBoard()
        value_function = PGStrategy(lr=0.001, weight_decay=0.003)
        value_function.evaluate(board.board, board.get_legal_moves_map(config.BLACK))

        move = RandomPlayer.get_move(board)
        board.apply_move(move, config.BLACK)
        value_function.evaluate(board.board, board.get_legal_moves_map(config.BLACK))

        move = RandomPlayer.get_move(board)
        board.apply_move(move, config.WHITE)
        value_function.evaluate(board.board, board.get_legal_moves_map(config.BLACK))
Exemplo n.º 6
0
    def __run__(self, player1, player2):
        """
        Runs an episode of the game

        :param player1:
        :param player2:
        :return: The original color of the winning player
        """
        self.board = TicTacToeBoard()
        players = player1, player2

        while True:
            move = players[0].get_move(self.board.copy())
            self.board.apply_move(move, players[0].color)

            winner = self.board.game_won()
            if winner is not None:
                return config.get_label_from_winner_color(
                    player1, player2, winner)

            players = list(reversed(players))
Exemplo n.º 7
0
    def test_Board_ApplyIllegalMove(self):
        board = TicTacToeBoard()
        board.apply_move((1, 1), config.BLACK)
        self.assertEqual(board.illegal_move, None)

        board.apply_move((1, 1), config.BLACK)
        self.assertEqual(board.illegal_move, config.BLACK)
Exemplo n.º 8
0
class TicTacToe(TwoPlayerGame):
    def __init__(self, players):
        super(TicTacToe, self).__init__(players=players, config=config)

        self.player1.color = config.BLACK
        self.player2.color = config.WHITE

        for player in players:
            player.original_color = player.color

    def __run__(self, player1, player2):
        """
        Runs an episode of the game

        :param player1:
        :param player2:
        :return: The original color of the winning player
        """
        self.board = TicTacToeBoard()
        players = player1, player2

        while True:
            move = players[0].get_move(self.board.copy())
            self.board.apply_move(move, players[0].color)

            winner = self.board.game_won()
            if winner is not None:
                return config.get_label_from_winner_color(
                    player1, player2, winner)

            players = list(reversed(players))

    def run_simulations(self,
                        episodes,
                        switch_colors=True,
                        switch_players=True):
        """
        Runs a number of games using the given players and returns statistics over all games run.


        If both :param switch_colors and :param switch_players are set, all four possible starting positions will iterated through.
        :param episodes: The number of games to run
        :param switch_colors: Flag specifying whether to alternate the players colors during play
        :param switch_players: Flag specifying whether to alternate the starting player
        :return: The results and average losses per episode where results is a list of the original colors of the winning player ([original_winning_color])
        """

        simulation_players = [self.player1, self.player2]

        results = []
        losses = []

        for episode in range(episodes):
            if switch_colors and episode != 0 and episode % 2 == 0:
                simulation_players[0].color, simulation_players[
                    1].color = simulation_players[1].color, simulation_players[
                        0].color

            if switch_players and episode != 0 and episode + 1 % 2:
                simulation_players = list(reversed(simulation_players))

            winner = self.__run__(simulation_players[0], simulation_players[1])
            player_losses = []
            for player in simulation_players:
                loss = player.register_winner(winner)
                if loss is not None:
                    player_losses.append(loss)

            losses += player_losses
            results.append(winner)

        for player in simulation_players:
            player.color = player.original_color

        return results, losses
Exemplo n.º 9
0
 def test_DummyForwardPass(self):
     board = TicTacToeBoard()
     value_function = PGStrategy(lr=0.001, weight_decay=0.003)
     value_function.evaluate(board.board, board.get_legal_moves_map(config.BLACK))
Exemplo n.º 10
0
    def test_Board_Representation(self):
        random_player = ttt_players.RandomPlayer()
        boards = []
        inverses = []
        for i in range(100):
            board = TicTacToeBoard()
            inverse_board = TicTacToeBoard()
            for j in range(9):
                move = random_player.get_move(board)
                color = (config.BLACK, config.WHITE)
                color = random.choice(color)

                board.apply_move(move, color)
                boards.append(board.copy())

                inverse_board.apply_move(move, board.other_color(color))
                inverses.append((inverse_board.copy()))

        for i in range(len(boards)):
            rep = boards[i].get_representation(config.WHITE)
            self.assertTrue((rep == inverses[i].board).all(),
                            msg="Inverting board failed")
Exemplo n.º 11
0
 def test_Board_GameWon(self):
     board = TicTacToeBoard()
     self.assertFalse(board.game_won(), msg="Empty Board")
     board.apply_move((0, 0), config.BLACK)
     board.apply_move((1, 1), config.WHITE)
     board.apply_move((1, 0), config.BLACK)
     board.apply_move((2, 2), config.WHITE)
     self.assertFalse(board.game_won(), msg="No Winner yet")
     board.apply_move((2, 0), config.BLACK)
     self.assertEqual(board.game_won(), config.BLACK, msg="Black Won")
Exemplo n.º 12
0
    def test_getAfterstates(self):
        board = TicTacToeBoard()
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0), (2, 1), (2, 2)],
                         [a[1] for a in board.get_afterstates(config.BLACK)])
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0), (2, 1), (2, 2)],
                         [a[1] for a in board.get_afterstates(config.WHITE)])

        board.apply_move((2, 2), config.BLACK)
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0), (2, 1)],
                         [a[1] for a in board.get_afterstates(config.BLACK)])
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0), (2, 1)],
                         [a[1] for a in board.get_afterstates(config.WHITE)])

        board.apply_move((2, 1), config.WHITE)
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0)],
                         [a[1] for a in board.get_afterstates(config.BLACK)])
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2),
                          (2, 0)],
                         [a[1] for a in board.get_afterstates(config.WHITE)])

        board.apply_move((1, 1), config.BLACK)
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0)],
                         [a[1] for a in board.get_afterstates(config.BLACK)])
        self.assertEqual([(0, 0), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0)],
                         [a[1] for a in board.get_afterstates(config.WHITE)])
Exemplo n.º 13
0
    def run(self, lr, silent=False):

        EVALUATION_GAMES = 10

        player = FCReinforcePlayer(lr=lr)
        player.color = config.BLACK

        expert = ExperiencedPlayer(deterministic=True, block_mid=True)
        expert.color = config.BLACK

        generator = RandomPlayer()
        color_iterator = self.AlternatingColorIterator()

        validation_set = self.generate_supervised_training_data(
            EVALUATION_GAMES,
            ExperiencedPlayer(deterministic=True, block_mid=True))

        print("Training ReinforcedPlayer supervised continuously with LR: %s" %
              lr)
        start = datetime.now()
        for game in range(self.games):
            rewards = []
            board = TicTacToeBoard()

            for i in range(9):
                expert_move = expert.get_move(board)
                player_move = player.get_move(board)

                reward = config.LABEL_WIN if expert_move == player_move else config.LABEL_LOSS
                rewards.append(reward)

                # prepare for next sample
                move = generator.get_move(board)
                board.apply_move(move, color_iterator.__next__())

            average_reward = sum(rewards) / len(rewards)
            player.strategy.rewards = rewards
            loss = player.strategy.update()

            del rewards[:]
            self.add_results([("Losses", loss), ("Reward", average_reward)])

            if game % self.evaluation_period == 0:
                test_rewards = []
                for board, expert_move in validation_set:
                    # Evaluation mode
                    player.strategy.train, player.strategy.model.training = False, False
                    strategy_move = player.get_move(board)
                    player.strategy.train, player.strategy.model.training = True, True

                    test_reward = config.BLACK if expert_move == strategy_move else config.WHITE
                    test_rewards.append(test_reward)

                average_test_reward = sum(test_rewards) / len(test_rewards)
                del test_rewards[:]
                self.add_results(("Test reward", average_test_reward))

            if not silent:
                if Printer.print_episode(game + 1, self.games,
                                         datetime.now() - start):
                    plot_name = "Supervised Continuous training of %s" % (
                        player)
                    plot_info = "%s Games - Final reward: %s \nTime: %s" % (
                        game + 1, average_reward, config.time_diff(start))
                    self.plot_and_save(plot_name, plot_name + "\n" + plot_info)

        return average_reward