Пример #1
0
 def test_BlockMid(self):
     player1 = ttt_players.ExperiencedPlayer(block_mid=True)
     player2 = ttt_players.ExperiencedPlayer(block_mid=False)
     simulation = TicTacToe([player1, player2])
     results, losses = simulation.run_simulations(self.TEST_EPISODES)
     print("Average Result Experienced(block) vs Experienced(): %s" %
           np.mean(results))
Пример #2
0
    def run(self, lr, milestones=False, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer(
            lr=lr)

        # Player 2 has the same start conditions as Player 1 but does not train
        self.player2 = self.player1.copy(shared_weights=True)
        self.player2.strategy.train = False
        self.simulation = TicTacToe([
            self.player1, self.player2
        ])  # Players never change, therefore we only need one simulation

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):
            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            # If milestones exist, use them with probability p
            if self.milestones and random() < 0.2:
                self.player2 = choice(self.milestones)
            else:
                self.player2 = self.player1.copy(shared_weights=True)

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode
            self.player2.strategy.train = False

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Self", np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            score, results, overview = evaluate_against_base_players(
                self.player1)
            self.add_results(results)

            if not silent and Printer.print_episode(
                    episode * games_per_evaluation, self.games,
                    datetime.now() - start_time):
                self.plot_and_save(
                    "%s vs SELF" % (self.player1.__str__() +
                                    (" milestones" if milestones else "")),
                    "Train %s vs Self version of self\nGames: %s Evaluations: %s\nTime: %s"
                    % (self.player1, episode * games_per_evaluation,
                       self.evaluations, config.time_diff(start_time)))

            # If x/5th of training is completed, save milestone
            if milestones and (self.games / episode *
                               games_per_evaluation) % 5 == 0:
                self.milestones.append(self.player1.copy(shared_weights=False))
                self.milestones[-1].strategy.train = False

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
def evaluate_against_base_players(player,
                                  evaluation_players=[
                                      RandomPlayer(),
                                      NovicePlayer(),
                                      ExperiencedPlayer(),
                                      ExpertPlayer()
                                  ],
                                  silent=True):
    """
    Standardized evaluation against base players.


    :param player: The player to be evaluated
    :param evaluation_players: A list of players against which the player should be evaluated
    :param silent: Flag controlling if output is written to console
    :return: a tuple (score, results) where score is the average score over all evaluation games (scalar (-1, 1)) and results is a list of
             tuples (name, score) where 'score' is the score (-1, 1) achieved when evaluated against the player named 'name'.
    """

    # Store original training values
    if issubclass(player.__class__, LearningPlayer):
        training_values = player.strategy.train, player.strategy.model.training
        player.strategy.train, player.strategy.model.training = False, False

    results = []
    for e_player in evaluation_players:
        simulation = TicTacToe([player, e_player])
        rewards, losses = simulation.run_simulations(config.EVALUATION_GAMES)
        results.append([e_player.__str__(), rewards])

        if not silent:
            print_results(player, e_player, rewards)

    # Restore original training values
    if issubclass(player.__class__, LearningPlayer):
        player.strategy.train, player.strategy.model.training = training_values

    avg_results = [(result[0], np.mean(result[1])) for result in results]
    avg_results.insert(
        0, ("Total Score", np.mean([
            res[1] for res in avg_results
        ])))  # Insert average overall score as first element of results

    results_overview = deepcopy(results)
    total = Counter(dict())
    for entry in results_overview:
        entry[1] = Counter(entry[1])
        total += entry[1]
    results_overview.insert(
        0, ("[Total Score]",
            total))  # Insert average overall score as first element of results

    if not silent:
        print("Overall score: %s" % avg_results[0][1])

    return avg_results[0][1], avg_results, results_overview
Пример #4
0
    def test_Performance(self):
        p1 = ttt_players.RandomPlayer()
        p2 = ttt_players.RandomPlayer()
        simulation = TicTacToe([p1, p2])
        N = 15000

        start = datetime.now()
        simulation.run_simulations(N)
        print("Simulating %s random games took %s" %
              (N, datetime.now() - start))
Пример #5
0
    def run(self, lr, silent=False):

        self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer(
            lr=lr)

        if self.opponent is not None:
            self.player2 = self.opponent
            self.simulation = TicTacToe([self.player1, self.player2])

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):

            if self.opponent is None:
                self.player2 = choice(
                    (RandomPlayer(), NovicePlayer(), ExperiencedPlayer()
                     ))  # choice((RandomPlayer(), ExpertPlayer()))
                self.simulation = TicTacToe([self.player1, self.player2])

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Training Results", np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            if self.opponent is None:
                score, results, overview = evaluate_against_base_players(
                    self.player1)
            else:
                score, results, overview = evaluate_against_base_players(
                    self.player1, evaluation_players=[self.opponent])

            self.add_results(results)

            if not silent:
                if Printer.print_episode(episode * games_per_evaluation,
                                         self.games,
                                         datetime.now() - start_time):
                    overview = format_overview(overview)
                    self.plot_and_save(
                        "%s vs TRADITIONAL OPPONENT" % (self.player1),
                        "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" %
                        (self.player1, self.opponent,
                         episode * games_per_evaluation, self.evaluations,
                         config.time_diff(start_time)))

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
Пример #6
0
class TrainReinforcePlayerVsBest(TicTacToeBaseExperiment):
    def __init__(self, games, evaluations, pretrained_player=None):
        super(TrainReinforcePlayerVsBest, self).__init__()
        self.games = games
        self.evaluations = evaluations
        self.pretrained_player = pretrained_player.copy(
            shared_weights=False) if pretrained_player else None

    def reset(self):
        self.__init__(games=self.games,
                      evaluations=self.evaluations,
                      pretrained_player=self.pretrained_player)
        return self

    def run(self, lr, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCReinforcePlayer(
            lr=lr)

        # Player 2 has the same start conditions as Player 1 but does not train
        self.player2 = self.player1.copy(shared_weights=False)
        self.player2.strategy.train = False

        games_per_evaluation = self.games // self.evaluations
        self.replacements = []
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):
            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            self.simulation = TicTacToe([self.player1, self.player2])
            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_results(("Losses", np.mean(losses)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            score, results, overview = evaluate_against_base_players(
                self.player1)
            self.add_loss(np.mean(losses))
            self.add_results(results)

            if not silent and Printer.print_episode(
                    episode * games_per_evaluation, self.games,
                    datetime.now() - start_time):
                self.plot_and_save(
                    "%s vs BEST" % (self.player1),
                    "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s"
                    % (self.player1, episode * games_per_evaluation,
                       self.evaluations, config.time_diff(start_time)))

            if evaluate_against_each_other(self.player1, self.player2):
                self.player2 = self.player1.copy(shared_weights=False)
                self.player2.strategy.train, self.player2.strategy.model.training = False, False
                self.replacements.append(episode)

        print("Best player replaced after episodes: %s" % self.replacements)
        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
Пример #7
0
    def testRandomPlayer(self):
        player1 = ttt_players.RandomPlayer()
        player2 = ttt_players.RandomPlayer()
        simulation = TicTacToe([player1, player2])
        results, losses = simulation.run_simulations(self.TEST_EPISODES)
        self.assertTrue(len(results) == self.TEST_EPISODES)
        self.assertTrue(None not in results)

        for i in range(4):
            simulation.__run__(player1, player2)
            black_stones, white_stones = simulation.board.count_stones()
            self.assertIn(black_stones,
                          [white_stones - 1, white_stones, white_stones + 1])
            if not simulation.board.game_won():
                self.assertEqual(black_stones + white_stones,
                                 simulation.board.board_size**2)
        print("Average Result Random vs Random: %s" % np.mean(results))
    def test_neverLose(self):
        GAMES = 10000

        player1 = SearchPlayer()
        player2 = SearchPlayer()
        random_player = RandomPlayer()

        simulation = TicTacToe([player1, player2])
        results, losses = simulation.run_simulations(GAMES // 100)
        self.assertEqual(
            len(results), results.count(0),
            "Perfect player mirror match resulted in a result other than draw")
        print("\nFirst 20 results: %s against self" % results[:20])

        simulation = TicTacToe([player1, random_player])
        results, losses = simulation.run_simulations(GAMES)
        self.assertEqual(0, results.count(-1),
                         "Perfect player lost against random")
        print("First 20 results: %s against random player" % results[:20])
        print("Win rate: %s vs random player" % (sum(results) / len(results)))
Пример #9
0
class TrainBaselinePlayerVsBest(TicTacToeBaseExperiment):
    def __init__(self, games, evaluations, pretrained_player=None):
        super(TrainBaselinePlayerVsBest, self).__init__()
        self.games = games
        self.evaluations = evaluations
        self.pretrained_player = pretrained_player.copy(
            shared_weights=False) if pretrained_player else None
        self.milestones = []

    def reset(self):
        self.__init__(games=self.games,
                      evaluations=self.evaluations,
                      pretrained_player=self.pretrained_player)
        return self

    def run(self, lr, milestones=False, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer(
            lr=lr)

        # Player 2 has the same start conditions as Player 1 but does not train
        self.player2 = self.player1.copy(shared_weights=False)
        self.player2.strategy.train, self.player2.strategy.model.training = False, False  # eval mode

        games_per_evaluation = self.games // self.evaluations
        self.replacements = []
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):
            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            # If milestones exist, use them with probability p
            if self.milestones and random() < 0.2:
                self.player2 = choice(self.milestones)

            self.simulation = TicTacToe([self.player1, self.player2])
            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Best", np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            score, results, overview = evaluate_against_base_players(
                self.player1)
            self.add_results(results)

            if not silent and Printer.print_episode(
                    episode * games_per_evaluation, self.games,
                    datetime.now() - start_time):
                self.plot_and_save(
                    "%s vs BEST" % (self.player1.__str__() +
                                    (" milestones" if milestones else "")),
                    "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s"
                    % (self.player1, episode * games_per_evaluation,
                       self.evaluations, config.time_diff(start_time)))

            if evaluate_against_each_other(self.player1, self.player2):
                self.player2 = self.player1.copy(shared_weights=False)
                self.player2.strategy.train, self.player2.strategy.model.training = False, False
                self.replacements.append(episode)

            # If x/5th of training is completed, save milestone
            if milestones and (self.games / episode *
                               games_per_evaluation) % 5 == 0:
                self.milestones.append(self.player1.copy(shared_weights=False))
                self.milestones[-1].strategy.train = False

        print("Best player replaced after episodes: %s" % self.replacements)
        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
class TrainReinforcePlayerVsTraditionalOpponent(TicTacToeBaseExperiment):
    def __init__(self, games, evaluations, pretrained_player, opponent):
        super(TrainReinforcePlayerVsTraditionalOpponent, self).__init__()
        self.games = games
        self.evaluations = evaluations
        self.pretrained_player = pretrained_player.copy(
            shared_weights=False) if pretrained_player else None
        self.opponent = opponent
        self.milestones = []

    def reset(self):
        self.__init__(games=self.games,
                      evaluations=self.evaluations,
                      pretrained_player=self.pretrained_player,
                      opponent=self.opponent)
        return self

    def run(self, lr, silent=False):

        self.player1 = self.pretrained_player if self.pretrained_player else FCReinforcePlayer(
            lr=lr)

        if self.opponent is not None:
            self.player2 = self.opponent
            self.simulation = TicTacToe([self.player1, self.player2])

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):

            if self.opponent is None:
                self.player2 = choice(
                    (RandomPlayer(), NovicePlayer(), ExperiencedPlayer()
                     ))  # choice((RandomPlayer(), ExpertPlayer()))
                self.simulation = TicTacToe([self.player1, self.player2])

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Training Results", np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            if self.opponent is None:
                score, results, overview = evaluate_against_base_players(
                    self.player1)
            else:
                score, results, overview = evaluate_against_base_players(
                    self.player1, evaluation_players=[self.opponent])
            self.add_results(results)

            if not silent:
                if Printer.print_episode(episode * games_per_evaluation,
                                         self.games,
                                         datetime.now() - start_time):
                    self.plot_and_save(
                        "%s vs TRADITIONAL OPPONENT" % (self.player1),
                        "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" %
                        (self.player1, self.opponent,
                         episode * games_per_evaluation, self.evaluations,
                         config.time_diff(start_time)))

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
Пример #11
0
    def test_ConvReinforcePlayer(self):
        fc_player = FCReinforcePlayer(lr=1e-4)
        random_player = RandomPlayer()

        simulation = TicTacToe([fc_player, random_player])
        simulation.run_simulations(100)
Пример #12
0
    def test_DummyTrainReinforcePlayer(self):
        player1 = FCReinforcePlayer(lr=0.001)
        player2 = RandomPlayer()

        simulation = TicTacToe([player1, player2])
        simulation.run_simulations(10)
Пример #13
0
 def test_ExperiencedVsRandom(self):
     player1 = ttt_players.ExperiencedPlayer()
     player2 = ttt_players.RandomPlayer()
     simulation = TicTacToe([player1, player2])
     results, losses = simulation.run_simulations(self.TEST_EPISODES)
     print("Average Result Experienced vs Random: %s" % np.mean(results))