def run_episode(self): examples = [] board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = Game(player_blue, player_red, board) while not game.is_finished(self.iter_max): new_action = game.get_next_action() game.actions.append(new_action) if game.current_player == Color.BLUE: examples.append([ board.get_features(game.current_player, game.round), player_blue.current_node.get_policy(game.current_player), Color.BLUE ]) examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_blue.current_node.get_policy( game.current_player, data_augmentation=True), Color.BLUE ]) else: examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_red.current_node.get_policy(game.current_player, data_augmentation=True), Color.RED ]) examples.append([ board.get_features(game.current_player, game.round), player_red.current_node.get_policy(game.current_player), Color.RED ]) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 winner = game.get_winner() set_winner(examples, winner) return examples
def test_fight2(self): player_blue = NNPlayer(Color.BLUE, n_simulations=100, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=100, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, 100)
def test_single_action_nn(self): n_simulations = 800 player_blue = NNPlayer(Color.BLUE, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=n_simulations, janggi_net=JanggiNetwork(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) board = get_random_board() game = Game(player_blue, player_red, board) game.get_next_action()
def organize_fight(self): player_red = RandomPlayer(Color.RED) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max) player_red = RandomMCTSPlayer( Color.RED, n_simulations=self.n_simulations_opponent, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) fight(player_blue, player_red, self.iter_max)
def run_episode_raw(args): print("Starting episode", current_process().name) begin_time = time.time() predictor, n_simulations, iter_max = args board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=n_simulations, current_node=initial_node, janggi_net=predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=n_simulations, current_node=initial_node, janggi_net=predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = run_game(board, player_blue, player_red, iter_max) print("Time Episode: ", time.time() - begin_time) return game.to_json(initial_node)
def get_player(player_name, color, model_saver): if player_name == "random_mcts": return RandomMCTSPlayer(color, n_simulations=800, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) else: predictor = JanggiNetwork() model_saver.load_index_model(predictor, None, player_name) return NNPlayer(color, n_simulations=400, janggi_net=predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01)
from ia.predictors import FilePredictor from ia.random_mcts_player import NNPlayer, fight, RandomMCTSPlayer from janggi.parameters import N_ITERATIONS, DEFAULT_N_SIMULATIONS from janggi.utils import Color # Example of command: # python3 show_match_nn.py --number_simulations 800 --n_iterations 200 --root_file_inference /tmp/showmatch --parallel_mcts True --n_threads_mcts 10 player_blue = NNPlayer(Color.BLUE, n_simulations=DEFAULT_N_SIMULATIONS, janggi_net=FilePredictor(), temperature_start=0.01, temperature_threshold=30, temperature_end=0.01, print_info=True) player_red = RandomMCTSPlayer(Color.RED, n_simulations=16000, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01, print_info=True) # player_red = NNPlayer(Color.RED, n_simulations=DEFAULT_N_SIMULATIONS, # janggi_net=FilePredictor(), # temperature_start=0.01, # temperature_threshold=30, # temperature_end=0.01, # print_info=True) fight(player_blue, player_red, N_ITERATIONS, print_board=True)
from ia.predictors import FilePredictor from ia.random_mcts_player import NNPlayer, fight from janggi.human_player import HumanPlayer from janggi.parameters import N_ITERATIONS, DEFAULT_N_SIMULATIONS from janggi.utils import Color # Example of command: # python3 play_against_nn.py --number_simulations 800 --n_iterations 200 --root_file_inference /tmp/showmatch --parallel_mcts True --n_threads_mcts 10 player_blue = HumanPlayer(Color.BLUE) player_red = NNPlayer(Color.RED, n_simulations=DEFAULT_N_SIMULATIONS, janggi_net=FilePredictor(), temperature_start=0.001, temperature_threshold=30, temperature_end=0.001, think_when_other=True, print_info=True) fight(player_blue, player_red, N_ITERATIONS, print_board=True)
def continuous_learning_once(self): # First, train for _ in range(EPOCH_NUMBER_CONTINUOUS): training_set = [] for example in _raw_to_examples( self.model_saver.all_episodes_raw_iterators(), PROP_POPULATION_FOR_LEARNING): training_set.append(example) if len(training_set) > N_LAST_GAME_TO_CONSIDER: if not TRAIN_ON_ALL: break self.train(training_set) training_set = [] self.train(training_set) # Then, fight! # old_model = copy.deepcopy(self.predictor) self.model_saver.load_latest_model(self.old_model, None) self.old_model.to(DEVICE) victories = 0 print("Start the fights!") for i in range(N_FIGHTS): if i < N_FIGHTS / 2: print("I am BLUE") new_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(new_player, old_player, self.iter_max) if winner == Color.BLUE: victories += 1 else: print("I am RED") new_player = NNPlayer(Color.RED, n_simulations=self.n_simulations, janggi_net=self.predictor, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) old_player = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, janggi_net=self.old_model, temperature_start=0.01, temperature_threshold=30, temperature_end=0.01) winner = fight(old_player, new_player, self.iter_max) if winner == Color.RED: victories += 1 if (victories + N_FIGHTS - i - 1) / N_FIGHTS * 100 < VICTORY_THRESHOLD: # There is no more hope... break victory_percentage = victories / N_FIGHTS * 100 if victory_percentage > VICTORY_THRESHOLD: # Replace model print("The model was good enough", victory_percentage) self.model_saver.save_weights(self.predictor, optimizer=self.optimizer) else: # We do not save the model print("The model was not good enough", victory_percentage)