def act(self, state): self.epsilon *= self.epsilon_decay self.epsilon = max(self.epsilon_min, self.epsilon) # return a simulated or random move if np.random.random() < self.epsilon: # run a MCTS move if at min epsilon if self.epsilon == self.epsilon_min: # create a new node based on the board test_node = Node(self.board, 1, 9, None) # expand the tree while I have time time_start = time.time() expand_node(test_node, time_start, self.max_time, self.max_sims) action = test_node.get_best_move() # otherwise return a random move else: action = random.getrandbits(2) while not self.board.is_valid_move(action): action = (action + 1) % 4 else: output_array = self.model.predict(state)[0] action = np.argmax(output_array) # if the action is invalid choose the next best while not self.board.is_valid_move(action): output_array[action] = -999999999999999 action = np.argmax(output_array) return action
def mcts(self, board, player, iteration_count=100, mixing_hyperparameter=0.5): root = Node((-1, -1), 1, player, board) for it in range(0, iteration_count): history = set() leaf = root.traverse_to_leaf(history) leaf.expand(self) leaf_values = self.evaluate_turn_value(leaf.board, leaf.player) player_values = {1: leaf_values[1], -1: leaf_values[2]} rollout_result = leaf.play_rollout(self) for node in history: node.visit_count = node.visit_count + 1 node.value_sum = node.value_sum + ( player_values[node.player] * mixing_hyperparameter + rollout_result * node.player * (1 - mixing_hyperparameter)) move = None max_visit_count = 0 for child_node in root.children: if child_node.visit_count > max_visit_count: max_visit_count = child_node.visit_count move = child_node.move return move
def get_NextLegalCommandNode(self, bruteForce=False): # legal Command를 가진 Node만 return argmaxOfSoftmax = self.currentNode.get_argmaxOfSoftmax() array4096 = self.currentNode.get_array4096() color = self.currentNode.get_Color() numOfLegalMoves = self.board_stack.get_ChessBoard().legal_moves.count() numOfChild = self.currentNode.get_LengthOfChild() finalIndex = self.currentNode.get_FinalChildIndex() # 언제 정지시켜야하는지 조건을 확인해야 한다. if bruteForce: repeatNum = 4096 else: repeatNum = numOfLegalMoves - numOfChild for i in range(repeatNum): index = argmaxOfSoftmax[(finalIndex + 1 + i) % 4096] command = self.ohe.indexToMove4096(index) tmpCommand = chess.Move.from_uci(command) if self.thresholdOfPolicyNetwork > array4096[index] and not bruteForce: # 정책망의 기준값 보다 작다면 반환하지 않는다. break if (tmpCommand in self.board_stack.get_ChessBoard().legal_moves) and not ( self.currentNode.is_SameCommandInChild(command)): return index, Node.Node(self.currentNode, command, array4096[index], color) else: tmpCommand = chess.Move.from_uci(command + "q") if (tmpCommand in self.board_stack.get_ChessBoard().legal_moves) and not ( self.currentNode.is_SameCommandInChild(command)): command = command + "q" return index, Node.Node(self.currentNode, command, array4096[index], color) # can't make child anymore return None, None
def build_mcts(self, state): """""" lg.logger_player.info("BUILDING MCTS") if self.mcts is None: if self.turn == 1: self.mcts = self.load_history(state) if self.color == -1: self.mcts.new_root(Node(state)) self.mcts.swap_values() if self.mcts is None: # may still be None if state does not exist in history self.mcts = MCTS(self.color, Node(state), self.c_puct) win_action = None else: win_action = self.mcts.new_root(Node(state)) return win_action
def generate_game(self, model: Polvalnet_fc): np.random.seed() triplets = [] step_game = 0 temperature = 1 game_over = False moves = 0 env = oz_env() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) while not game_over: moves += 1 step_game += 1 if step_game == 50: temperature = 10e-6 start = time.time() pi, successor, root_node = MCTS(temp=temperature, network=model, root=root_node) #print("Calculated next move in {}ms".format(time.time() - start)) feature = root_node.env.board triplets.append([feature, pi]) #print('') #print(root_node.env.board) #print("Running on {} ".format(mp.current_process())) root_node = successor game_over = root_node.env.is_game_over() z = root_node.env.who_won() for i in range(len(triplets) - step_game, len(triplets)): triplets[i].append(z) return triplets
def get_BestQuNode_Before(self): #Qu는 가장 Q(s,a) + u(s,a) 의 값 #자식 노드의 수 다음 배열에서 Node를 만들어서 argmaxOfSoftmax = self.currentNode.get_argmaxOfSoftmax() array4096 = self.currentNode.get_array4096() index = argmaxOfSoftmax[self.currentNode.get_LengthOfChild()] command = self.ohe.indexToMove4096(index) color = not self.currentNode.get_Color() newNode = Node.Node(self.currentNode, command, array4096[index], color) childList = self.currentNode.get_Child() if len(childList) == 0: #자식이 없는 경우 return newNode maxQuNode = newNode for node in childList: if node.get_Qu() > maxQuNode.get_Qu(): maxQuNode = node ####is_child만으로는 중복 생성되는 노드를 막을 수 없음. 수정 필요 if self.currentNode.is_child(maxQuNode): #자식인경우 새로 생성된 newNode는 사용되지 않았으므로 #소멸 del newNode return maxQuNode
def decide_move(self, board, verbose=False, total_moves=None): """ Given current board, return a move to play. :type board: Class Board :rtype A list of 2 tuples, specifying the move's FROM and TO. """ if verbose: board.visualise(cur_player = self.player_num) print('Facing the board above, Ai Version {} is thinking.'.format(self.model.version)) node = Node(board, self.player_num) # Play deterministically when moves reach a certain number if total_moves is not None and total_moves > TOTAL_MOVES_TILL_TAU0: if self.tree_tau != DET_TREE_TAU: print('Player {}: changing tree tau from {} to {}'.format(self.player_num, self.tree_tau, DET_TREE_TAU)) self.tree_tau = DET_TREE_TAU tree = MCTS(node, self.model, tree_tau=self.tree_tau) pi, sampled_edge = tree.search() if verbose: human_fromPos = board_utils.np_index_to_human_coord(sampled_edge.fromPos) human_toPos = board_utils.np_index_to_human_coord(sampled_edge.toPos) print('Ai Version {} moved from {} to {}\n'.format( self.model.version, human_fromPos, human_toPos)) return sampled_edge.fromPos, sampled_edge.toPos
def getColumn(self, board): # t0 = time.time() depth = 0 if self.plays_first else 1 node = Node(board=board, depth=depth) Agent().train_mcts_ntimes(node, 10, verbose=True) best_move = argmax([child.wins for child in node.children]) print("#" * 50) # warning(time.time() - t0) return best_move
def main(): network = load_model(args.newnetwork) score_net = 0 score_random = 0 for game in range(args.numgames): moves = 0 temperature = 10e-6 env = oz_env() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False # print(root_node.env.board[71]) while not game_over: start = time.time() if root_node.env.board[71] == -1: #print("am here") pi, successor, root_node = MCTS(temp=temperature, network= network, root=root_node) root_node = successor else: if (root_node.children == None): root_node.children = [None]*len(root_node.legal_moves) move = np.random.randint(0,(len(root_node.legal_moves))) if (root_node.children[move] is None): next_env = deepcopy(root_node.env) next_env.step(root_node.legal_moves[move]) root_node.children[move] = Node(next_env,temperature,parent=root_node,child_id=move) root_node = root_node.children[move] moves = moves + 1 game_over = root_node.env.is_game_over() z = root_node.env.who_won() if z <= -1: score_net += 1 else: score_random += 1 print("Game {} complete. Net: {} Random: {}".format(game, score_net, score_random)) print("New network score total wins: {} Average Score: {}".format(score_net, score_net / args.numgames)) print("Random play score total wins: {} Average Score: {}".format(score_random, score_random / args.numgames))
def selfPlay(): env = QE() env.reset() tree = Tree(Node(env)) while (tree.rootNode.state.winner == None): print('Here') for i in range(10): tree.search() pi = tree.play() print(tree.rootNode.state.winner)
def action(self, board): """ Performs an action """ states = board.all_possible_next_states(board.Player_turn()) nodes = [Node(i) for i in states] values = [] converted_state = self.convert_nodes_to_input(nodes) for state in converted_state: values.append(self.nn.forward(state).item()) if board.Player_turn == "A": return states[np.argmax(values)] else: return states[np.argmin(values)]
def evaluateLeaf(self, leaf, value): if value == 0: value, probs, allowedActions = self.getPredictions(leaf.state) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState = leaf.state.takeAction(action) if newState.toString() not in self.mcts.tree: node = Node(newState) self.mcts.addNode(node) else: node = self.mcts.tree[newState.toString()] newEdge = Edge(leaf, node, probs[idx], action) leaf.edges.append(newEdge) return value
def run(numThreads, numSimulations, modelName): #model = load_model('./models/' + modelName + '.h5', custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits}) env = QE() env.reset() tree = Tree(Node(env)) gameStates = [] players = [] pis = [] envs = [] testBool = True #while tree.rootNode.state.winner == None: while testBool: testBool = False gameStates.append(tree.rootNode.state.gameState) players.append( 1 * (tree.rootNode.state.playerA == tree.rootNode.state.currPlayer) + -1 * (tree.rootNode.state.playerB == tree.rootNode.state.currPlayer)) envs.append(tree.rootNode.state) pi = tree.play() pis.append(pi) winner = tree.rootNode.state.winner for i in range(len(players)): gameState = gameStates[i] pi = pis[i] player = players[i] env = envs[i] if winner == 0: value = 0 elif winner == player: value = 1 elif winner == -player: value = -1 else: raise Exception("Unrecognized Winner") savedState = SavedState(gameState, pi, value, env) savedPath = "./positions/" + modelName + "-" + datetime.now().strftime( "%d-%b-%Y-%H-%M-%S-%f") writeSavedState(savedState, savedPath) return winner
def make_random_move(root): ''' Independent on MCTS. Instead sample a random move from current board's valid moves. ''' random.seed() cur_state = root.state player = root.currPlayer valid_actions = cur_state.get_valid_moves( player) # dict, key: checker pos, value: possible dest from pos random_start = random.choice(list(valid_actions.keys())) while len(valid_actions[random_start]) == 0: random_start = random.choice(list(valid_actions.keys())) random_end = random.choice(valid_actions[random_start]) next_state = copy.deepcopy(cur_state) next_state.place(player, random_start, random_end) new_player = PLAYER_ONE + PLAYER_TWO - player return Node(next_state, new_player)
def run_tournament(self, candidate, candidate_alpha_scores, incumbent_alpha_scores, _): moves = 0 temperature = 10e-6 p = np.random.binomial(1, 0.5) == 1 white, black = (self.current_policy, candidate) if p else (candidate, self.current_policy) env = oz_env() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False while not game_over: if root_node.env.white_to_move: player = white else: player = black pi, successor, root_node = MCTS(temp=temperature, network=player, root=root_node) root_node = successor moves += 1 game_over = root_node.env.is_game_over() z = root_node.env.who_won() # from white perspective if white == candidate: candidate_alpha_scores.append(+z) incumbent_alpha_scores.append(-z) print("Candidate won!") else: candidate_alpha_scores.append(-z) incumbent_alpha_scores.append(+z) print("Incumbent won!")
def buildMCTS(self, state): self.root = Node(state) self.mcts = MCTS(Node(state))
def set_RootNode(self): self.root_Node = Node.Node(None,None,self.board_stack.get_Color()) # 루트 노드 생성 self.currentNode = self.root_Node #루트노드가 생성될 때 currentNode로 설정
from QuorridorEnvironment import QuorridorEnvironment as QE from MCTS import Tree, Node import Thread import time start = time.time() env = QE() env.reset() rootNode = Node(env) Thread.search(8, 32, rootNode) end = time.time() print('Total Time: ' + str(end - start))
def initialize_mcts(self): self.state.PlayerA.place_workers(self.state) self.state.PlayerB.place_workers(self.state) root = Node(self.state) self.mcts = MCTS(root, self.nn, self.args)
def selfplay(model1, model2=None, randomised=False): ''' Generate an agent self-play given two models TODO: if `randomised`, randomise starting board state ''' if model2 is None: model2 = model1 player_progresses = [0, 0] player_turn = 0 num_useless_moves = 0 play_history = [] tree_tau = TREE_TAU board = Board(randomised=randomised) root = Node(board, PLAYER_ONE) # initial game state use_model1 = True while True: model = model1 if use_model1 else model2 if len(root.state.hist_moves) < INITIAL_RANDOM_MOVES: root = make_random_move(root) else: # Use Current model to make a move root = make_move(root, model, tree_tau, play_history) assert root.isLeaf() hist_moves = root.state.hist_moves cur_player_hist_moves = [ hist_moves[i] for i in range(len(hist_moves) - 1, -1, -2) ] history_dests = set([move[1] for move in cur_player_hist_moves]) # If limited destinations exist in the past moves, then there is some kind of repetition if len(cur_player_hist_moves) * 2 >= TOTAL_HIST_MOVES and len( history_dests) <= UNIQUE_DEST_LIMIT: print('Repetition detected: stopping and discarding game') return None, None # Evaluate player progress for stopping progress_evaluated = root.state.player_progress(player_turn + 1) if progress_evaluated > player_progresses[player_turn]: num_useless_moves = int(num_useless_moves * (NUM_CHECKERS - 1) / NUM_CHECKERS) player_progresses[player_turn] = progress_evaluated else: num_useless_moves += 1 # Change player player_turn = 1 - player_turn use_model1 = not use_model1 # Change TREE_TAU to very small if game has certain progress so actions are deterministic if len(play_history) + INITIAL_RANDOM_MOVES > TOTAL_MOVES_TILL_TAU0: if tree_tau == TREE_TAU: print( 'selfplay: Changing tree_tau to {} as total number of moves is now {}' .format(DET_TREE_TAU, len(play_history))) tree_tau = DET_TREE_TAU if root.state.check_win(): print('END GAME REACHED') break # Stop (and discard) the game if it's nonsense if num_useless_moves >= PROGRESS_MOVE_LIMIT: print( 'Game stopped by reaching progress move limit; Game Discarded') return None, None if randomised: # Discard the first `BOARD_HIST_MOVES` as the history is not enough return play_history[BOARD_HIST_MOVES:], utils.get_p1_winloss_reward( root.state) else: return play_history, utils.get_p1_winloss_reward(root.state)
def get_move(self, game_state, det, sims): # if only one move is available, this one is chosen allowed_actions = valid_actions(game_state.array) if len(allowed_actions) == 1: return allowed_actions[0], None # the given game state is set as root of the tree if self.mcts is None or game_state.id not in self.mcts.tree: self.mcts = MCTS(Node(game_state)) else: self.mcts.root = self.mcts.tree[game_state.id] # simulate a number of games starting from the current game state to fill the Monte Carlo Search Tree for i in range(sims): leaf, chosen_path, new_game_state = self.mcts.simulate_game() # checking if the game finished after the simulation or if the end of the tree was reached if new_game_state is None or check_for_winner( new_game_state.array) is None: # if the game is not finished the model is used to evaluate the game state value, probabilities, allowed_actions = self.get_predictions( leaf.game_state) # the model also provides a probability distribution of the best move to take in this situation probabilities = probabilities[allowed_actions] # new edges and nodes are created at the leaf to expand the tree for idx, action in enumerate(allowed_actions): new_game_state = leaf.game_state.take_action(action) if new_game_state.id not in self.mcts.tree: node = Node(new_game_state) self.mcts.add_node(node) else: node = self.mcts.tree[new_game_state.id] new_edge = Edge(leaf, node, probabilities[idx], action) leaf.edges.append((action, new_edge)) else: # if the game is finished, the model is not needed because the value of the game state is the result value = -1 if check_for_winner(new_game_state.array) == 0: value = 0 # after the value of the game state is calculated, the chosen path of the Search Tree is updated self.mcts.back_propagation(leaf, chosen_path, value) q = np.zeros(42, dtype=np.float32) n = np.zeros(42, dtype=np.integer) # choosing the best move after the simulations for action, edge in self.mcts.root.edges: q[action] = edge.Q n[action] = edge.N n = n / (np.sum(n) * 1.0) # the values are normalized into a scale of 0 to 1 allowed_actions = valid_actions(game_state.array) normalized = np.zeros(42, dtype=np.float64) for index in allowed_actions: normalized[index] = (q[index] - min(q)) / (max(q) - min(q)) normalized = normalized / np.sum(normalized) # the selection can rarely lead to an error because of a prior rounding error try: # either the best move is chosen or a random one depending on whether the deterministic flag is set. if det: # one of the moves with the highest value is chosen actions = np.argwhere(normalized == max(normalized)) action = random.choice(actions)[0] else: # semi-randomly selecting a move - the higher the value the more likely it is chosen normalized[allowed_actions[-1]] = normalized[ allowed_actions[-1]] + (1 - np.sum(normalized)) action_idx = np.random.multinomial(1, normalized) action = np.where(action_idx == 1)[0][0] except (ValueError, IndexError): # if the error occurs, simply a random allowed move is chosen instead action = random.choice(allowed_actions) return action, n
def main(): # n, num_games, verbose, starting_player, max_rollouts = setup_game() n, num_games, verbose, starting_player, max_rollouts = 5, 200, False, 1, 0.5 results = [] game_num = 1 viewer = None run_tournament = True with_training = True num_games_tournament = 25 if run_tournament: save_path = "short_topp" else: save_path = "long_topp" ##### CONFIG ##### buffer_size = 40 train_interval = 40 saving_interval = 10 moves_done = 0 epochs = 300 ################## buffer = ReplayBuffer(vfrac=0.1, tfrac=0.1, size=buffer_size) anet = init_anet(n, buffer) if with_training: anet.save_to_file(save_path + "/model_step_{0}.h5".format(0)) game = Hex(n, starting_player) ROOT_NODE = Node(game=game) while with_training and num_games >= game_num: game = Hex(n, starting_player) next_root = ROOT_NODE # viewer = Board(game) print("Game number {}".format(game_num)) while game.get_moves(): mc = MonteCarlo(game, max_rollouts, next_root) mc.run(lambda _input: ANET.predict(_input, model=anet.model)) case = mc.get_training_case() buffer.push(case) next_root = mc.get_best_move() game.do_move(next_root.move) moves_done += 1 if viewer: viewer.do_move(next_root.move, game.player) if moves_done % train_interval == 0: buffer.update() anet.train_model(epochs) anet.run_against_random(num_games=50, game_num=game_num) if saving_interval > 0 and game_num % saving_interval == 0: anet.save_to_file(save_path + "/model_step_{0}.h5".format(game_num)) buffer.size += 20 # train_interval += 5 # anet.optimizer.lr /= 2 if game.get_result(game.player) == 1: results.append(game.player) game_num += 1 if viewer: viewer.persist() if run_tournament: tournament = Tournament(num_games_tournament) tournament.run_tournament(save_path) else: anet.save_to_file("best_topp/model_2.h5")
def AI_vs_AI(): flag = True while flag: playerLetter = random.choice(('X', 'O')) computerLetter = 'O' if playerLetter == 'X' else 'X' turn = whoGoesFirst() theBoard = [' '] * 10 mcts = MCTS(2, playrandom, get_possible_next_states) first_letter = playerLetter if turn == 'player' else computerLetter for player in mcts.player_list: if player.nr == 0: player.id = first_letter else: player.id = playerLetter if first_letter == computerLetter else computerLetter mcts.root = Node(State(True, theBoard), mcts.player_list[0]) gameIsPlaying = True while gameIsPlaying: if turn == 'player': print('\n') drawBoard(theBoard) print('\n') mcts.root = mcts.find_next_move() # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, playerLetter, i) break if isWinner(theBoard, playerLetter): drawBoard(theBoard) print(playerLetter, ' won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'computer' input() else: print('\n') #print('\n') drawBoard(theBoard) print('\n') mcts.root = mcts.find_next_move() # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, computerLetter, i) break if isWinner(theBoard, computerLetter): drawBoard(theBoard) print(computerLetter, ' won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'player' input() cont = input('another game?\n') if cont not in ['y', 'yes', 'ye']: flag = False
def normal_game(): print("\nWelcome to MonteCarlo-TicTacToe") playerLetter, computerLetter = inputPlayerLetter() turn = whoGoesFirst() theBoard = [' '] * 10 mcts = MCTS(2, playrandom, get_possible_next_states) first_letter = playerLetter if turn == 'player' else computerLetter for player in mcts.player_list: if player.nr == 0: player.id = first_letter else: player.id = playerLetter if first_letter == computerLetter else computerLetter mcts.root = Node(State(True, theBoard), mcts.player_list[0]) gameIsPlaying = True while gameIsPlaying: if turn == 'player': print('\n') drawBoard(theBoard) move = getPlayerMove(theBoard) makeMove(theBoard, playerLetter, move) status = True # da falls das nicht der fall ist in der folgenden Auswertung sowieso das Spiel endet next_state = State(status, theBoard) mcts.update_root(next_state) if isWinner(theBoard, playerLetter): drawBoard(theBoard) print('You have won the game!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'computer' else: print('\n') print('\n') drawBoard(theBoard) mcts.root = mcts.find_next_move() #choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0]) # make the move that was choosen by the mcts-algorithm for i, entry in enumerate(theBoard): if entry != mcts.root.state.board[i]: makeMove(theBoard, computerLetter, i) break if isWinner(theBoard, computerLetter): drawBoard(theBoard) print('The computer has beaten you!') gameIsPlaying = False else: if isBoardFull(theBoard): drawBoard(theBoard) print('The game is a tie!') break else: turn = 'player'
def main(): #print("check-2") network = load_model(args.newnetwork) #print("am here") score_net = 0 score_random = 0 for game in range(args.numgames): moves = 0 temperature = 10e-6 white = 1 black = None env = oz_env() env.reset() root_node = Node(env, Config.EXPLORE_FACTOR) game_over = False while not game_over: if root_node.env.board[71] == 1: player = white else: player = black #print(root_node.env.board) start = time.time() if player == white: #print(root_node.env.board) print("am here-1\n") pi, successor, root_node = MCTS(temp=temperature, network=network, root=root_node) #print(root_node.env.board) print("MCTS completed move {} in: {}".format( moves, time.time() - start)) root_node = successor else: print("am here-2\n") if (root_node.children == None): root_node.children = [None] * len(root_node.legal_moves) move = np.random.randint(0, (len(root_node.legal_moves))) if (root_node.children[move] is None): #next_env = root_node.env.deepcopy() next_env = deepcopy(root_node.env) #next_env = root_node.env next_env.step(root_node.legal_moves[move]) root_node.children[move] = Node(next_env, temperature, parent=root_node, child_id=move) root_node = root_node.children[move] print(root_node.env.board) moves = moves + 1 game_over = root_node.env.is_game_over() z = root_node.env.who_won() # from white perspective #if white == player: if z >= 1: score_net += 1 else: score_random += 1 #else: # if z <= -1: # score_net += 1 # else: # score_random += 1 print("Game {} complete. Net: {} Random: {}".format( game, score_net, score_random)) print("New network score total wins: {} Average Score: {}".format( score_net, score_net / args.numgames)) print("Random play score total wins: {} Average Score: {}".format( score_random, score_random / args.numgames))