def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for _ in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, board, sampled_game, identity_of_bot) #update state with actions taken to select selected_node selected_node = node select_actions = [] while selected_node.parent: select_actions.append(selected_node.parent_action) selected_node = selected_node.parent select_actions.reverse() for action in select_actions: sampled_game = board.next_state(sampled_game, action) #handle possible selection of terminal node if not node.untried_actions: won = board.points_values(sampled_game)[1] else: #expand from selection node = expand_leaf(node, board, sampled_game) #update simulated state sampled_game = board.next_state(sampled_game, node.parent_action) # simulate game from new node sampled_game = rollout(board, sampled_game) won = board.points_values(sampled_game)[1] # update tree backpropagate(node, won) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. best_winrate = -inf if identity_of_bot == 1: sign = 1 else: sign = -1 for action, child in root_node.child_nodes.items(): child_winrate = (child.wins/child.visits)*sign if child_winrate > best_winrate: best_action = action best_winrate = child_winrate return best_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # reset state to exclude newly expanded leaf # Start at root node = root_node # Do MCTS - This is all you! # Selection leaf = traverse_nodes(node, board, sampled_game, identity_of_bot) # current leaf # Expansion new_leaf = expand_leaf(leaf, board, sampled_game) # expand to a new leaf sampled_game = board.next_state(sampled_game, new_leaf.parent_action) # Rollout if not board.is_ended(sampled_game): rollout(board, sampled_game) # play the game # who wins score = board.points_values(sampled_game) winner = 'draw' if score is not None: if score[1] == 1: winner = 1 elif score[2] == 1: winner = 2 if winner is identity_of_bot: i_won = 1 else: i_won = 0 backpropagate(leaf, i_won) # back up using i_won condition # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. best_child = max( root_node.child_nodes.items(), key=lambda item: item[1].visits)[1] # most frequently visited best_move = best_child.parent_action return best_move
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ # Initialize variables identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Traverse tree until leaf is reached, get new state leaf, new_state = traverse_nodes(node, board, sampled_game, identity_of_bot) # If the reached leaf is not a game ending state, expand the tree if not board.is_ended(new_state): child = expand_leaf(leaf, board, new_state) new_state = board.next_state(new_state, child.parent_action) else: child = leaf # Simulate possible outcome for leaf won = rollout(board, new_state, identity_of_bot) # Backpropogate simulation results backpropagate(child, won) best_UCT = 0 best_children = [] # Choose best child depending on UCT calculation for key, child in root_node.child_nodes.items(): child_UCT = child.wins / float(child.visits) if child_UCT == best_UCT: best_children.append(child) elif child_UCT > best_UCT: best_children = [child] best_UCT = child_UCT best_child = choice(best_children) # print("MCTS vanilla picking {} with ratio {}".format(best_child.parent_action, best_UCT)) return best_child.parent_action
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ identity_of_bot = state.player_turn root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Do MCTS - This is all you! #Traversal leaf, sampled_game = traverse_nodes(node, sampled_game, identity_of_bot) # Expand and roll out unless terminal if len(leaf.untried_actions) > 0: #Expansion new_node, sampled_game = expand_leaf(leaf, sampled_game) #Rollout won = rollout(sampled_game, identity_of_bot) else: new_node = leaf if sampled_game.winner == identity_of_bot: won = True else: won = False #Backpropagate backpropagate(new_node, won) best_action = None best_wins = 0 for action, child in root_node.child_nodes.items(): if child.wins > best_wins: best_wins = child.wins best_action = action #Prospects are bad... if best_action == None: best_action = choice(list(root_node.child_nodes.keys())) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. return best_action
def expand_leaf(node, board, state): new_move = choice(node.untried_actions) board.next_state(state, new_move) new_node = MCTSNode(parent=node, parent_action=new_move, action_list=board.legal_actions(state)) node.child_nodes[new_move] = new_node return new_node pass
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) step = 0 global explore_faction while step < num_nodes: # Do MCTS - This is all you! leaf_node, sampled_game = traverse_nodes(root_node, board, state, identity_of_bot) new_nodes = expand_leaf(leaf_node, board, sampled_game) if len(new_nodes) == 0: break step += len(new_nodes) done_rollouts = {} for roll_node in new_nodes: if tuple(sorted( roll_node.untried_actions)) in done_rollouts.keys(): backpropagate( roll_node, done_rollouts[tuple(sorted(roll_node.untried_actions))]) else: won = rollout( board, board.next_state(sampled_game, roll_node.parent_action))[identity_of_bot] backpropagate(roll_node, won) if won == 1: explore_faction += .25 # if a game is won, seek out games along this path. elif won == 0 and explore_faction > 0.25: explore_faction -= 0.25 # if a game is lost, seek fewer games from this path. done_rollouts[tuple(sorted(roll_node.untried_actions))] = won # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. maximum = -1 max_node = root_node for child in root_node.child_nodes.values(): # print("Score: ", child.wins / child.visits) if child is not None and (child.wins / child.visits) > maximum: max_node = child maximum = (child.wins / child.visits) # print("Max Node: ", max_node, " - ", max_node.wins/max_node.visits) return max_node.parent_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) max_child_visits = 0 # May belong inside of below loop, took it out during testing, never got that far tho selected_action = None next_state = state for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # max_child_visits = 0 # selected_action = None # Do MCTS - This is all you! child_node = traverse_nodes(node, sampled_game, identity_of_bot, board) if child_node.parent != None: check_win_state = board.next_state(next_state, child_node.parent_action) has_won = board.is_ended(check_win_state) else: has_won = board.is_ended(next_state) if not has_won: expanded_node = expand_leaf(child_node, board, next_state) next_state = board.next_state(next_state, expanded_node.parent_action) win_dict = rollout(next_state, board) backpropagate(expanded_node, win_dict) continue # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. for children in root_node.child_nodes.values(): if children.visits > max_child_visits: max_child_visits = children.visits selected_action = children.parent_action micro_actions[(selected_action[0], selected_action[1])].append( (selected_action[2], selected_action[3])) ##print("test", micro_actions) ##print("selected action", selected_action) return selected_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) # start at root root_node = MCTSNode(parent=None, parent_action=None) node = root_node root_node.untried_actions = fun_board.legal_actions(state) for step in range(num_nodes): sampled_game = state # Start at root node = root_node node.state = sampled_game node = traverse_nodes(node, sampled_game, identity_of_bot) leaf_node = expand_leaf(node, sampled_game) sampled_game = rollout(leaf_node.state) won = board.win_values(sampled_game) if won is None: won = False elif won[identity_of_bot] == 1: won = True else: won = False backpropagate(leaf_node, won) best_action = None best_ratio = 0 for action in root_node.child_nodes.keys(): child_node = root_node.child_nodes[action] ratio = child_node.wins / child_node.visits if ratio >= best_ratio: best_ratio = ratio best_action = action # global root_node # root_node = root_node.child_nodes[best_action] if best_action is None: print(node) print(best_action) return best_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state), player=identity_of_bot) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! node = traverse_nodes(node, board, sampled_game, identity_of_bot) sampled_game = update_state(node, board, state) node = expand_leaf(node, board, sampled_game) sampled_game = update_state(node, board, state) if node: won = rollout(board, sampled_game) backpropagate(node, won, identity_of_bot) else: break # With the completed tree, get the action with the best rate. best_action = None best_rate = float('-inf') highest_visits = 0 for child in root_node.child_nodes: r = root_node.child_nodes[child] child_wins = r.visits - r.wins if r.player != identity_of_bot else r.wins child_rate = child_wins / root_node.child_nodes[child].visits child_visits = root_node.child_nodes[child].visits if child_rate > best_rate and child_visits >= highest_visits: best_action = child best_rate = child_rate #print("BEST: " + str(best_rate)) highest_visits = child_visits print("MCTS Vanilla bot " + str(identity_of_bot) + " picking %s with expected win rate %f" % (str(best_action), best_rate)) return best_action
def onClick(self, event): x,y = self.get_intersection(event.x, event.y) if (x != -1 and y != -1): board_coords = self.get_board_coordinates(x, y) if not self.first_move: board = self.board_state.get_board() if self.first_move or board[board_coords[1]][board_coords[0]] == 0: # player is able to place piece if self.first_move: self.first_move = False self.placed_pieces.append(Piece(board_coords[1], board_coords[0], self.player_turn, self.placePiece(x, y), self)) new_board = [[0] * BOARD_SIZE for _ in range(BOARD_SIZE)] new_board[board_coords[1]][board_coords[0]] = BLACK self.board_state = BoardState(grid=new_board, recent_move=(board_coords[1], board_coords[0]), turn=BLACK, search_breadth=1) self.past_board_states.append(self.board_state) self.player_turn = (-1)*self.player_turn else: self.placed_pieces.append(Piece(board_coords[1], board_coords[0], self.player_turn, self.placePiece(x, y), self)) self.past_board_states.append(self.board_state) self.board_state = self.board_state.play(board_coords[1],board_coords[0]) self.player_turn = (-1)*self.player_turn possible_winner = self.board_state.get_winner() if possible_winner != 0: self.winner(possible_winner) else: ai_mcts_node = MCTSNode(self.board_state) ai_mcts_tree = MCTSTree(ai_mcts_node) next_state = ai_mcts_tree.best_move(time_cutoff=WAIT_TIME) self.board_state = next_state ai_move = next_state.get_recent_move() print("("+str(ai_move[0])+", "+str(ai_move[1])+")") self.placed_pieces.append(Piece(ai_move[1], ai_move[0], self.player_turn, self.placePiece((ai_move[1]+1)*self.grid_interval, (ai_move[0]+1)*self.grid_interval), self)) self.past_board_states.append(self.board_state) self.player_turn = (-1) * self.player_turn possible_winner = self.board_state.get_winner() if possible_winner != 0: self.winner(possible_winner)
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) sampled_game = None for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! leaf = traverse_nodes(node, board, sampled_game, identity_of_bot) if leaf.untried_actions: node, sampled_game = expand_leaf(leaf, board, sampled_game) sampled_game = rollout(board, sampled_game) player = board.current_player(sampled_game) won = False if player != identity_of_bot: won = True backpropagate(node, won) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. action = None best = 0.0 for child in root_node.child_nodes: value = float(root_node.child_nodes[child].wins) / float( root_node.child_nodes[child].visits) #print(root_node.child_nodes[child]) if value >= best: # print("BEST:") # print(value) best = value action = root_node.child_nodes[child].parent_action return action
def think(state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: state: The state of the game. Returns: The action to be taken. """ root_node = MCTSNode(parent=None, parent_action=None, action_list=state.legal_moves) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state.copy() # Start at root node = root_node # Traverse node = traverse_nodes(node, sampled_game) # rollout rollout(sampled_game) # backpropagate backpropagate(node, sampled_game.winner) # estimated win rate. list_child = list(root_node.child_nodes.values()) best_child = max(list_child, key=lambda c: c.wins / c.visits) #if best_child.wins / best_child.visits == -100 and len(root_node.action_list) == 1 and root_node.action_list[0] == False: # return True; """ sorted_children = sorted(list_child, key = lambda c: c.wins/c.visits) if len(list_child) == 2 and sorted_children[1].wins / sorted_children[1].visits > -0.75: print (sorted_children[1].wins / sorted_children[1].visits) return choice([True, False]) numWinners = None for i in range (len(sorted_children)): if sorted_children[i].wins == -1: numWinners = i - 1 break if numWinners == -1: return sorted_children[0].parent_action else: return sorted_children[0].parent_action """ return best_child.parent_action
def expand_leaf(node, game, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Returns an added child node. """ action = choice(node.untried_actions) state = game.next_state(state, action) node.untried_actions.remove(action) new_leaf = MCTSNode(node, action, game.legal_actions(state)) node.child_nodes[action] = new_leaf return new_leaf
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ #print("untried actions: ", node.untried_actions) action = choice(node.untried_actions) #print("new node from action: ", action, type(action)) newChild = MCTSNode(node, action, board.next_state(state, action)) node.child_nodes[action] = newChild return newChild
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS lmaoooo child_node, sampled_game = traverse_nodes(node, board, sampled_game, True) expanded_node, sampled_game = expand_leaf(child_node, board, sampled_game) sampled_game = rollout(board, sampled_game) # check who won # if node couldn't be expanded, mark down that it was visited but no win/loss. # I'M NOT SURE IF THE ABOVE IS RIGHT BUT SOMETHING NEEDS TO HAPPEN ! # 0: node is an end point 1: player has won 2: player has lost if not expanded_node.untried_actions: backpropagate(expanded_node, 0) elif board.points_values(sampled_game)[identity_of_bot] is 1: backpropagate(expanded_node, 1) else: backpropagate(expanded_node, 2) # select an action after MCTS has built the tree win_rate = 0 best_action = None for action, child_node in node.child_nodes.items(): child_node_wr = child_node.wins / child_node.visits if child_node_wr > win_rate: win_rate = child_node_wr best_action = action print("Vanilla bot picking %s with expected score %f" % (str(best_action), win_rate)) return best_action
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ action_update = choice(node.untried_actions) next_state_temp = board.next_state(state, action_update) new_node = MCTSNode(parent=node, parent_action=action_update, action_list=board.legal_actions(next_state_temp)) node.child_nodes[action_update] = new_node node.untried_actions.remove(action_update) return new_node
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) i = 0 #print("my name jeff") for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root #print("this is the length : ",len(root_node.child_nodes)) leaf_node = traverse_nodes(root_node, board, sampled_game, identity_of_bot) child_node = expand_leaf(leaf_node, board, state) if len(child_node.untried_actions) == 0: #print("drawsss") bestAction = child_node.parent_action break parent_node = child_node.parent actions = parent_node.untried_actions wins = rollout(board, board.next_state(state, actions[0])) temp = child_node temp.visits += 1 temp.wins += wins while temp.parent !=None: temp = temp.parent temp.visits += 1 temp.wins += wins root_node = temp high=-1 for node in root_node.child_nodes: temp2=root_node.child_nodes[node] if ((temp2.wins/temp2.visits)>high) and node!=None: score=(temp2.wins/temp2.visits) best=node # Do MCTS - This is all you! return best
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) replies = {} # (move, identity) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Selection (sampled_game, leaf_node) = traverse_nodes(node, board, sampled_game, identity_of_bot) if not leaf_node: continue # Expansion (sampled_game, leaf_node) = expand_leaf(leaf_node, board, sampled_game) # Simulation (sampled_game, move) = rollout(board, sampled_game, replies) # round and round # Backpropagation won = board.win_values(sampled_game)[identity_of_bot] backpropagate(leaf_node, won) # Propagate last known good replies # if move: # backpropagate_moves(move, replies, identity_of_bot, won) children = [n for n in root_node.child_nodes.items() if n[1].visits > 0] return max(children, key=lambda n: n[1].wins / n[1].visits)[0]
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # reset state to exclude newly expanded leaf # Start at root node = root_node # Do MCTS - This is all you! leaf = traverse_nodes(node, board, sampled_game, identity_of_bot) # current leaf # Expansion current, sampled_game = traverse_nodes(node, board, sampled_game, identity_of_bot) new_leaf, sampled_game = expand_leaf(current, board, sampled_game) # expand to a new leaf # use heuristic to make more intelligent moves heuristic_think = rollout(board, sampled_game, identity_of_bot) # Backpropagate backpropagate(new_leaf, heuristic_think) new_leaf = choice(list(root_node.child_nodes.values())) optimal_winAmount = 0 # for each child of the root determine win ratios to converges towards better moves for child in root_node.child_nodes.values(): # obtain ratio of wins compared to visits winAmount = child.wins / child.visits # if the win amount of the node is better than the previously best amount # set the new_leaf to be the child of the better performing node and # update the optimal win amount to see if another is better if (winAmount > optimal_winAmount): optimal_winAmount = winAmount new_leaf = child # return the best found action which will be the parent of the new leaf node return new_leaf.parent_action
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ new_action = node.untried_actions.pop(0) state = board.next_state(state, new_action) new_node = MCTSNode(node, new_action, board.legal_actions(state)) node.child_nodes[new_action] = new_node return new_node
def expand_leaf(node, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ m = choice (node.untried_actions) state.apply_move(m) new_node = MCTSNode (parent = node, parent_action = m , action_list = state.legal_moves) #needs fixing node.untried_actions.remove(m) node.child_nodes[m] = new_node return new_node
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) start = time() for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! curr_node, sampled_game = traverse_nodes(node, board, sampled_game, identity_of_bot) new_child, sampled_game = expand_leaf(curr_node, board, sampled_game) sampled_game = rollout(board, sampled_game) won = board.points_values(sampled_game)[identity_of_bot] backpropagate(new_child, won) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. best_winrate = 0 rdm_node = choice(list(root_node.child_nodes.values())) for child in root_node.child_nodes.values(): winrate = child.wins / child.visits if winrate > best_winrate: best_winrate = winrate rdm_node = child print("mcts_vanilla picking %s" % (str(rdm_node.parent_action))) return rdm_node.parent_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! current_node, sampled_game = traverse_nodes(node, board, sampled_game, identity_of_bot) next_node, sampled_game = expand_leaf(current_node, board, sampled_game) sampled_game = rollout(board, sampled_game) score = board.points_values(sampled_game) won = 1 if score[identity_of_bot] == 1 else 0 backpropagate(next_node, won) next_node = choice(list(root_node.child_nodes.values())) max_winrate = 0 for child in root_node.child_nodes.values(): winrate = child.wins / child.visits if winrate > max_winrate: max_winrate = winrate next_node = child return next_node.parent_action
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) # print("Board: ", board, "State: ", state) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! parent_node, id_return, parent_state = traverse_nodes( node, board, sampled_game, identity_of_bot) if id_return == 0: # if no unused action and no childen child_node = parent_node next_state = parent_state else: # if unused action child_node, next_state = expand_leaf(parent_node, board, parent_state) num = rollout(board, next_state) # result of game who won, tied and lose my_result = num[identity_of_bot] backpropagate(child_node, my_result) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. winRatio = [] for child in root_node.child_nodes.values(): winRatio.append((child, child.wins / child.visits)) #print(winRatio) best_child = max(winRatio, key=lambda i: i[1])[0] # find the best child with win/visits # return child.parent_action # print("mcts vanilla picking: ", best_child.parent_action) return best_child.parent_action
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ newMove = choice(node.untried_actions) newState = board.next_state(state, newMove) new_node = MCTSNode(node, newMove, board.legal_actions(newState)) node.child_nodes[newMove] = new_node node.untried_actions.remove(newMove) return (new_node)
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ action = choice(node.untried_actions) state = board.next_state(state, action) actions = board.legal_actions(state) node.child_nodes[action] = MCTSNode(node, action, actions) node.untried_actions.remove(action) return node.child_nodes[action], state
def think(board, state): """ Performs MCTS by sampling games and calling the appropriate functions to construct the game tree. Args: board: The game setup. state: The state of the game. Returns: The action to be taken. """ identity_of_bot = board.current_player(state) root_node = MCTSNode(parent=None, parent_action=None, action_list=board.legal_actions(state)) for step in range(num_nodes): # Copy the game for sampling a playthrough sampled_game = state # Start at root node = root_node # Do MCTS - This is all you! # update the state along with the node node, selected_state = traverse_nodes(node, board, sampled_game, identity_of_bot) node, expanded_state = expand_leaf(node, board, selected_state) rollout_state = rollout(board, expanded_state, identity_of_bot) point = board.points_values(rollout_state) # win or lose? could be a method here if point[identity_of_bot] == 1: won = 1 elif point[identity_of_bot] == 0: won = 0 else: won = -1 backpropagate(node, won) #get the node with the highest win rate win_rate = {} for child in root_node.child_nodes.values(): win_rate[child] = child.wins / child.visits winner = max(win_rate, key=win_rate.get) # Return an action, typically the most frequently used action (from the root) or the action with the best # estimated win rate. return winner.parent_action
def expand_leaf(node, state, board): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ random_options = choice(node.untried_actions) legal_actions = board.legal_actions(board.next_state( state, random_options)) child_node = MCTSNode(node, random_options, legal_actions) node.child_nodes[random_options] = child_node node.untried_actions.remove(random_options) return child_node
def expand_leaf(node, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. state: The state of the game. Returns: The added child node. """ shuffle(node.untried_actions) next_action = node.untried_actions.pop() state.apply_move(next_action) next_node = MCTSNode(parent=node, parent_action=next_action, action_list=state.legal_moves) node.child_nodes[next_action] = next_node return node.child_nodes[next_action], state
def expand_leaf(node, board, state): """ Adds a new leaf to the tree by creating a new child node for the given node. Args: node: The node for which a child will be added. board: The game setup. state: The state of the game. Returns: The added child node. """ action = node.untried_actions[0] node.untried_actions.remove(action) newState = board.next_state(state, action) #print(action) child = MCTSNode(parent=node, parent_action=action, action_list=board.legal_actions(newState)) node.child_nodes[action] = child return child pass