def fight_agent(best_model: str, current_model: str, ae, round_fight=AlphaZeroConfig.ROUND_ARENA, max_turn=AlphaZeroConfig.MAX_TURN_ARENA, max_simulation=AlphaZeroConfig.MAX_SIMULATION_ARENA): """ The pitted 2 agents. We will check who is the best here. :param best_model: The current best model file path :param current_model: The current model file path :param ae: The Action Encoder :param round_fight: number of round to determine the winner :param max_turn: The maximum turn of the game. If the current turn is higher than max turn. It will be cut and the outcome of the game is draw. :param max_simulation: The maximum of simulation :return: dict, The dictionary of the score """ from ai_modules.reinforcement_algorithm import AlphaZeroAgent loss_win = {0: 0, 1: 0} for round in range(round_fight): print("ROUND {}".format(round + 1)) terminal = False count_turn = 1 state = State() state.initial_state() best_model_agent = AlphaZeroAgent(state, max_simulation, best_model) # 1 current_model_agent = None # 0 while not terminal and count_turn <= max_turn: print("=======TURN {} ========".format(count_turn)) state.print_board() current_player_turn = state.get_player_turn() if current_player_turn == 1: key, dict_key = best_model_agent.choose_action(state) state = AIElements.result_function(state, dict_key) if current_model_agent is not None: current_model_agent.enemy_turn_action(key, state) else: if current_model_agent is None: current_model_agent = AlphaZeroAgent( state, max_simulation, current_model) key, dict_key = current_model_agent.choose_action(state) state = AIElements.result_function(state, dict_key) best_model_agent.enemy_turn_action(key, state) print("Player %d choose action %s" % (current_player_turn, key)) game_ended = state.is_terminal() if game_ended: print("Player {} Win".format(count_turn % 2)) loss_win[(current_player_turn) % 2] += 1 terminal = True count_turn += 1 if count_turn > max_turn: print("ROUND {} DRAW".format(round + 1)) return loss_win
class GameController(): """ A class used to connect the view and the model. """ def __init__(self): self.state = State() self.state.initial_state() self.possible_action_keys = [] self.two_players = False self.player_vs_ai_white = False def play_with_two_players_start(self): """ Return the initial state Returns ------- dict Dict of possible Action and state """ self.state = State() self.state.initial_state() self.two_players = True state_dict = AIElements.get_state_dict(self.state) possible_action = AIElements.get_possible_action(self.state) self.possible_action_keys = possible_action.keys() return { "state": state_dict, "possible_action": possible_action, "task": "CHANGE_PLAYER" } def play_with_ai_white(self, ai_agent=ControllerConfig.AI_AGENT): """ Returns ------- dict Dict of possible action and state """ self.state = State() self.state.initial_state() self.player_vs_ai_white = True state_dict = AIElements.get_state_dict(self.state) possible_action = AIElements.get_possible_action(self.state) self.possible_action_keys = possible_action.keys() if ai_agent == 'random': self.ai_agent = RandomAgent() elif ai_agent == 'minimaxab': self.ai_agent = MinimaxABAgent(player_color=0) elif ai_agent == 'azero': self.ai_agent = AlphaZeroAgent() self.old_state_reward = deepcopy(self.state) return { "state": state_dict, "possible_action": possible_action, "task": "CHANGE_PLAYER" } def receive_input_action_play(self, input_key, input_params): """ Process the input from the user in the view :param input_key: str, the key of the action :param input_params: dict, the parameter of the action :return: bool, tell that the action is present in the possible action """ if input_key in self.possible_action_keys: self.state = AIElements.result_function(self.state, input_params) ## Useful for alpha zero only self.ai_agent.enemy_turn_action(input_key, input_params) index_player = (AIElements.get_player(self.state) + 1) % 2 print("TURN %d" % (self.state.turn)) print("The Evaluation of Player %d is %.2f" % (index_player, AIElements.evaluation_function(self.state, index_player))) return True else: return False def get_whattodo_view(self): """ Give the view the dict that tell the possible action on this turn and the task that the view should do :return: dict """ params_view_action = {} self.state.print_board() if AIElements.is_over(self.state): params_view_action['task'] = 'END_GAME' print("test") return params_view_action if self.two_players: params_view_action['task'] = 'CHANGE_PLAYER' params_view_action['state'] = AIElements.get_state_dict(self.state) possible_action = AIElements.get_possible_action(self.state) params_view_action['possible_action'] = possible_action self.possible_action_keys = possible_action.keys() if self.player_vs_ai_white: self.possible_action_keys = AIElements.get_possible_action( self.state).keys() params_view_action['task'] = 'AI_MOVE' ai_key_action, ai_action_params = self.ai_agent.choose_action( self.state) previous_state = deepcopy(self.state) self.receive_input_action_play(ai_key_action, ai_action_params) if AIElements.is_over(self.state): params_view_action['end'] = True params_view_action['task'] = 'END_GAME' return params_view_action print("Reward Function is %.2f" % (AIElements.reward_function( self.old_state_reward, self.state, 1))) #Black self.old_state_reward = deepcopy(self.state) state_dict = AIElements.get_state_dict(self.state) previous_state_dict = AIElements.get_state_dict(previous_state) possible_action = AIElements.get_possible_action(self.state) previous_mana = AIElements.get_players_mana(previous_state) params_view_action['state'] = state_dict params_view_action["prev_state"] = previous_state_dict params_view_action["ai_action"] = ai_action_params params_view_action["prev_mana"] = previous_mana params_view_action["possible_action"] = possible_action self.possible_action_keys = possible_action.keys() return params_view_action
def fit_train(global_list_training, ae, model_deep_net, episode=AlphaZeroConfig.EPISODE, mtcs_sims=AlphaZeroConfig.MCTS_SIMULATION, best_model=None, greedy_episode=AlphaZeroConfig.GREEDY_EPISODE): """ Train the model of neural network using the data generated from the MCTS simulation. :param global_list_training: the list of the data that will be used to the input of neural network :param ae: Action Encoder object :param model_deep_net: a Keras neural network model. :param episode: Number of game episodes :param mtcs_sims: Number of MCTS simulation :param best_model: a Keras neural network model which represents the best model :return: """ if best_model is None: # If the training is from the start best_model = clone_model(model_deep_net.model) best_model.set_weights(model_deep_net.model.get_weights()) best_model.save(AlphaZeroConfig.BEST_MODEL_PATH) for eps in range(episode): print("Episode %d" % (eps)) print("==============") # If episode is below of 1/8 of episode, force attack or promote if possible. greed_is_good = True if eps < greedy_episode else False state = State() state.initial_state() stacked_state = StackedState(state) mcts = MCTreeSearch(model_deep_net.model, 1, mtcs_sims, ae, stacked_state) new_list_training = do_self_play_episode(stacked_state, mcts, ae, greed_is_good) global_list_training.extend(new_list_training) # Checkpoint list_training pickle.dump(global_list_training, open("global_list_training.p", "wb")) deep_repr_state, action_proba, reward = parse_global_list_training( global_list_training) print("Fitting the model!") print("------------") model_deep_net.model.fit([deep_repr_state], [action_proba, reward], batch_size=AlphaZeroConfig.BATCH_SIZE_FIT, epochs=AlphaZeroConfig.EPOCHS_FIT) model_deep_net.model.save(AlphaZeroConfig.CURRENT_MODEL_PATH) print("------------") print("Arena!") dict_score = fight_agent(AlphaZeroConfig.BEST_MODEL_PATH, AlphaZeroConfig.CURRENT_MODEL_PATH, ae) from pprint import pprint print("Arena is done! 1: Best Model, 0: Current Model") pprint(dict_score) if dict_score[ 0] >= dict_score[1] * AlphaZeroConfig.WIN_DIFFERENCE_ARENA: # Change the best model print("Change Best Model!") best_model = clone_model(model_deep_net.model) best_model.set_weights(model_deep_net.model.get_weights()) else: # Redo the trained model into previous best model print("Redo the current model into best model") model_deep_net.model = clone_model(best_model) # We need to do this to compile the model. model_deep_net.model.set_weights( model_deep_net.model.get_weights()) from keras.optimizers import Adam model_deep_net.model.compile( loss=['categorical_crossentropy', 'mean_squared_error'], optimizer=Adam(0.001)) best_model.save("best_model.hdf5") model_deep_net.model.save("checkpoint.hdf5")