PLAYER) self.ai_oppoenet = MCTS(None, P2, 128, OPPONENT) self.current_user = None def select_action(self, state): if self.current_user == PLAYER: self.ai_player.current_user = PLAYER action = self.ai_player.simulation(state) else: self.ai_oppoenet.current_user = OPPONENT action = self.ai_oppoenet.simulation(state) return action if __name__ == '__main__': env = tictactoe_env.TicTacToeEnv() manager = AiVsAi() result = {-1: 0, 0: 0, 1: 0} for game in range(GAMES): print('########## Game: {} ##########\n'.format(game + 1)) player_color = (MARK_O + game) % 2 state = env.reset(player_color=player_color) done = False step_play = 0 while not done: current_user = ((PLAYER if player_color == MARK_O else OPPONENT) + step_play) % 2 print('- BOARD -') print(env.board[PLAYER] + env.board[OPPONENT] * 2)
state_memory.appendleft(self.root) pi_memory.appendleft(pi.flatten()) return tuple(action) if __name__ == '__main__': start = time.time() train_dataset_store = [] state_memory = deque(maxlen=102400) pi_memory = deque(maxlen=102400) z_memory = deque(maxlen=102400) env_game = tictactoe_env.TicTacToeEnv() env_simul = tictactoe_env.TicTacToeEnv() result_game = {-1: 0, 0: 0, 1: 0} win_mark_o = 0 step_game = 0 step_total_simul = 0 print("=" * 30, " Game Start ", "=" * 30, '\n') for game in range(GAMES): player_color = (MARK_O + game) % 2 state_game = env_game.reset(player_color=player_color) mcts = MCTS() done_game = False step_play = 0