Пример #1
0
 def run_episode(self):
     examples = []
     board = get_random_board()
     initial_node = MCTSNode(is_initial=True)
     player_blue = NNPlayer(Color.BLUE,
                            n_simulations=self.n_simulations,
                            current_node=initial_node,
                            janggi_net=self.predictor,
                            temperature_start=1,
                            temperature_threshold=30,
                            temperature_end=0.01)
     player_red = NNPlayer(Color.RED,
                           n_simulations=self.n_simulations,
                           current_node=initial_node,
                           janggi_net=self.predictor,
                           temperature_start=1,
                           temperature_threshold=30,
                           temperature_end=0.01)
     game = Game(player_blue, player_red, board)
     while not game.is_finished(self.iter_max):
         new_action = game.get_next_action()
         game.actions.append(new_action)
         if game.current_player == Color.BLUE:
             examples.append([
                 board.get_features(game.current_player, game.round),
                 player_blue.current_node.get_policy(game.current_player),
                 Color.BLUE
             ])
             examples.append([
                 board.get_features(game.current_player,
                                    game.round,
                                    data_augmentation=True),
                 player_blue.current_node.get_policy(
                     game.current_player, data_augmentation=True),
                 Color.BLUE
             ])
         else:
             examples.append([
                 board.get_features(game.current_player,
                                    game.round,
                                    data_augmentation=True),
                 player_red.current_node.get_policy(game.current_player,
                                                    data_augmentation=True),
                 Color.RED
             ])
             examples.append([
                 board.get_features(game.current_player, game.round),
                 player_red.current_node.get_policy(game.current_player),
                 Color.RED
             ])
         game.board.apply_action(new_action)
         game.switch_player()
         game.board.invalidate_action_cache(
             new_action)  # Try to reduce memory usage
         game.round += 1
     winner = game.get_winner()
     set_winner(examples, winner)
     return examples
Пример #2
0
 def test_random(self):
     player_blue = RandomPlayer(Color.BLUE)
     player_red = RandomPlayer(Color.RED)
     board = Board()
     game = Game(player_blue, player_red, board)
     game.run_game(200)
     print(repr(board))
     print(board)
     print(game.get_winner())