def main(): mcts_wins = 0 draws = 0 mcts_loses = 0 for game in range(100): VF_pickle = open( "/Users/lpothabattula/Desktop/100_Days_of_ML_Code/Day4/TicTacToeValFun.pickle", "rb") ValueFunction = pickle.load(VF_pickle) BoardObj = TicTacToe() currNode = Node(expanded=False, visited=True, TotalSimualtionReward=0, totalNumVisit=1, TicTacToe=BoardObj, parent=None) # print("Initial Board setting") # currNode.TicTacToe.print_board() while not currNode.Terminal: player = currNode.TicTacToe.moveCnt % 2 + 1 if currNode.TicTacToe.moveCnt & 1: x, y = take_action(currNode.TicTacToe.board, Value=ValueFunction, player=player) TicTacToeObj = cp.deepcopy(currNode.TicTacToe) TicTacToeObj.make_move(x, y) nextNode = currNode.compareTo(TicTacToeObj.board) if nextNode is None: nextNode = Node(expanded=False, visited=True, TotalSimualtionReward=0, totalNumVisit=1, TicTacToe=TicTacToeObj, parent=None) else: nextNode = MCTS.MonteCarloTreeSearch(currNode, 0.1) # print("After {} Move".format(nextNode.TicTacToe.moveCnt)) # print(nextNode.TotalSimualtionReward) # print(nextNode.TotalNumVisit) # nextNode.TicTacToe.print_board() currNode = nextNode if currNode.TicTacToe.draw: draws += 1 print("Match {}:Drawn".format(game)) else: if currNode.TicTacToe.moveCnt & 1: mcts_wins += 1 print("Match {}:First Player won".format(game)) else: mcts_loses += 1 print("Match {}:Second Player won".format(game)) print("Final analysis:MCTS vs TD") print("MCTS won {} times".format(mcts_wins)) print("match drawn {} times".format(draws)) print("MCTS lost {} times".format(mcts_loses))
def playGame(brainPower): # temp game for the simulation tempGame = DotsAndBoxes.DotsAndBoxes() # value for creating IDs currentId = 0 # root node for the tree root = DBNode.DBNode(tempGame, currentId, -1, (-1, 0, 0)) currentId += 1 # dictionary that will act as the game tree tree = dict() tree[root.id] = root while True: if not tempGame.player: nextNode = randomMove(root) # update the root for the computer root = tree[nextNode] else: if len(root.board.moves) < 12: rollouts = 12500 elif len(root.board.moves) < 16: rollouts = 10000 elif len(root.board.moves) < 22: rollouts = 7500 else: rollouts = 3000 rollouts *= brainPower nextComputerId, currentId = MCTS.MCTS(tree, currentId, root.id, rollouts) nextComputerId, currentId = MCTS.MCTS(tree, currentId, root.id, rollouts) # update the root root = tree[nextComputerId] if root.board.checkEnd(): return (root.board.P1Score, root.board.P2Score)
def game_loop_vs_random_player(self): # print board print(self) # create MCTS instance mcts = MCTS() # game loop while True: player_to_play = self.game.player_turn random_player = RandomAzulPlayerPlus.RandomAzulPlayerPlus("P1") if player_to_play == "P1": random_player.set_board(self.game) pit_choice, tile_type, column_choice = random_player.random_action( ) self = self.make_move(player_to_play, pit_choice, tile_type, column_choice) print(self) #else gioca MCTS else: best_move = mcts.search(self) # legal moves available try: # make AI move here self = best_move.board print("MCTS played:", self.game.pit_choice, self.game.color_choice, self.game.row_choice) # game over except: pass print(self) # print board # check if the game is won if self.game.is_done_phase: print("is done phase\n") break
def evaluateLeaf(self, leaf, value, done, breadcrumbs): if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) else: node = self.mcts.tree[newState.id] newEdge = mc.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) return ((value, breadcrumbs))
def playPacmans(gameGrid, maze, pacmans, gameStats, real): if not real: for i in range(len(pacmans)): pos = pacmans[i].getCord() maze[pos[0]][pos[1]].remove("pacman") pos = pacmans[i].move(gameGrid) maze[pos[0]][pos[1]].place("pacman", gameStats) pacmans[i].cord = pos else: mcts = MCTS.MCTS(gameGrid) mcts.play()
def evaluate_leaf(self, leaf, value, over, backtrack): if over == 0: value, probabilities, allowed = self.get_predictions(leaf.state) # maybe trying to only have probs of allowed moves? probs = [] for move in allowed: probs.append(probabilities[move]) for idx, move in enumerate(allowed): new_state, _, _ = leaf.state.make_move(move) if new_state.id not in self.mcts.tree: node = mc.Node(new_state) self.mcts.add_node(node) else: node = self.mcts.tree[new_state.id] new_edge = mc.Edge(leaf, node, probs[idx], move) leaf.edges.append((move, new_edge)) return (value, backtrack)
def __init__(self, agent_id, char_index, max_episode_length, num_simulation, max_rollout_steps, c_init, c_base, recursive=False, num_samples=1, num_processes=1, comm=None, logging=False, logging_graphs=False, seed=None): self.agent_type = 'MCTS' self.verbose = False self.recursive = recursive #self.env = unity_env.env if seed is None: seed = random.randint(0, 100) self.seed = seed self.logging = logging self.logging_graphs = logging_graphs self.agent_id = agent_id self.char_index = char_index self.sim_env = VhGraphEnv() self.sim_env.pomdp = True self.belief = None self.max_episode_length = max_episode_length self.num_simulation = num_simulation self.max_rollout_steps = max_rollout_steps self.c_init = c_init self.c_base = c_base self.num_samples = num_samples self.num_processes = num_processes self.previous_belief_graph = None self.verbose = False self.mcts = MCTS(self.sim_env, self.agent_id, self.char_index, self.max_episode_length, self.num_simulation, self.max_rollout_steps, self.c_init, self.c_base) if self.mcts is None: raise Exception # Indicates whether there is a unity simulation self.comm = comm
def run(self, mcts_sim, games): for i in range(games): print("Game number", i + 1) best_path = list() mcts_current = MCTS(self.hex_state, anet=self.anet, verbose=self.verbose) game_sim = mcts_sim while not mcts_current.root_node.state.check_finished( ): # Game has no winner next_node = mcts_current.run(game_sim) best_path.append(next_node) next_state = next_node.state mcts_current = MCTS(next_state, anet=self.anet, verbose=self.verbose) game_sim += self.sim_increment winner = mcts_current.root_node.state.player % 2 + 1 if winner == 1: self.p1_wins += 1 else: self.p2_wins += 1 print("Player", winner, "won!!") for node in best_path: label = create_distribution(node.parent) board = node.parent.state.Hex_to_list() net_board = node.parent.state.list_to_net(board) self.add_data(net_board, label) self.train() if i % self.save_int == 0 and i != 0: if self.preload: for case in self.buffer: self.add_data_to_file("RBUF.txt", case[0], case[1]) self.anet.save_model(self.file_add + str(i)) if i % self.buffer_clear == 0 and i != 0: if len(self.buffer) > 500: self.buffer = self.buffer[500:] self.hex_state.player = self.hex_state.change_player()
def __init__(self, master=None, height=0, width=0): Tkinter.Canvas.__init__(self, master, height=height, width=width) self.step_record_chess_board = Record.Step_Record_Chess_Board() # 初始化記步器 self.height = 15 self.width = 15 self.init_chess_board_points() # 畫點 self.init_chess_board_canvas() # 畫棋盤 self.board = MCTS.Board() self.n_in_row = 5 self.n_playout = 400 # num of simulations for each move self.c_puct = 5 """ Important 1: Python is pass by reference So the self.board will be modified by other operations """ self.AI = MCTS.MonteCarlo(self.board, 1) self.AI_1 = MCTS.MonteCarlo(self.board, 0) self.clicked = 1 self.init = True # first place is given by user (later need to be replaced as a random selection) self.train_or_play = True # True - train, False - play self.step = 0 self.text_id = None
def playing(model): gamegrid = puzzle.GameGrid() i = 0 while (1): NN_data_temp, event = MCTS.mcts_process(gamegrid.matrix, model) gamegrid.action(event) print("step: ", i) i += 1 for l in gamegrid.matrix: print(l, event) if gamegrid.is_over: score_tem = gamegrid.max_value break return score_tem
def AI_move(): global total_time_cost global SIDE global left_side_mark global black_piece_count if not SIDE: # white bot time_start = time.time() ret = MCTS.MCT_step(board_situation, black_piece, white_piece, line_count, black_piece_count, white_piece_count) time_end = time.time() time_cost = round(time_end - time_start, 4) total_time_cost += time_cost print(ret) print('totally cost = ', time_cost, 's') if time_cost > 60: print("black win") move_piece_index = white_piece.index(ret[0]) oldx = ret[0][0] oldy = ret[0][1] posx = ret[1][0] posy = ret[1][1] # update board situation board_situation[oldx][oldy] = 0 board_situation[posx][posy] = 1 line_count[oldx + 7] = line_count[oldx + 7] - 1 line_count[oldy - 1] = line_count[oldy - 1] - 1 line_count[oldx + oldy + 13] = line_count[oldx + oldy + 13] - 1 if oldx != 8 or oldy != 1: line_count[oldx - oldy + 35] = line_count[oldx - oldy + 35] - 1 # update piece info white_piece[move_piece_index] = [posx, posy] # update canvas board.coords(white_in_canvas[move_piece_index], posx * 30 + 4, (9 - posy) * 30 + 4, posx * 30 + 26, (9 - posy) * 30 + 26) # eat piece if [posx, posy] in black_piece: # one black out board.coords(black_in_canvas[black_piece.index([posx, posy])], 301, 301, 301, 301) # move out black_piece[black_piece.index([posx, posy])] = [114, 114] black_piece_count = black_piece_count - 1 else: line_count[posx + 7] = line_count[posx + 7] + 1 line_count[posy - 1] = line_count[posy - 1] + 1 line_count[posx + posy + 13] = line_count[posx + posy + 13] + 1 if posx != 8 or posy != 1: line_count[posx - posy + 35] = line_count[posx - posy + 35] + 1 # change side to black SIDE = 1 right_side.delete(right_side_mark) left_side_mark = left_side.create_oval(39, 139, 61, 161, fill="black")
def deal_request(): if request.method == "GET": # get通过request.args.get("param_name","")形式获取参数值 #get_q = request.args.get("q","") #print("start ai") import sys sys.path.append("../AI/") import MCTS mcts_manager = MCTS.mcts(timeLimit=10000) #开启mcts程序 print("初始化中...") #from datetime import date from datetime import datetime #from datetime import timedelta start_datetime = datetime(2021, 2, 1, 9, 30, 0) end_datetime = datetime(2021, 2, 1, 20, 30, 0) initialstate = MCTS.State(nowspotname="横滨港未来21", travelpoint=0, totaltravelpoint=0, moneycost=0, onfoottime=0, now_datetime=start_datetime, end_datetime=end_datetime, hasbeenspots=[]) print("初始化sucess") root = mcts_manager.search(initialState=initialstate) result = mcts_manager.getBestRoute(root) return str(result) elif request.method == "POST": # post通过request.form["param_name"]形式获取参数值 post_q = request.form["q"] return render_template("result.html", result=post_q)
def getAction(self, simulator): mcts = MCTS.MCTS(C=5) mcts.run(self.numOfiterations, simulator, self.network, rolloutFn=self.rollout, balance=self.balance) self.act_pro_pair = mcts.getPolicy() p = 0 action = (-1, -1) for (act, pro) in self.act_pro_pair.items(): if pro > p: p = pro action = act elif pro == p and np.random.random() > 0.5: action = act return action
def playing(cpuct, times): gamegrid = puzzle.GameGrid() i = 0 while (1): event = MCTS.mcts_process(gamegrid.matrix, cpuct=cpuct, update_times=times) gamegrid.action(event) # print("step: ", i) i += 1 for l in gamegrid.matrix: # print(l, event) pass if gamegrid.is_over: score_tem = gamegrid.max_value break return score_tem
def select_action(input_state, input_hidden_state, actor_model, critic_model=None, teacher_model=None, K=1, use_MLE=False, MCTS_thresh=0): """Applies the model on a given input and hidden state to make a prediction of which action to take Can use MLE, MCTS, or sampling to select an action""" probs, hidden_state = actor_model(input_state, input_hidden_state) m = Categorical(probs) # Use MLE instead of sampling distribution if use_MLE: _, topi = probs.data.topk(1) action = topi.squeeze() # Note: MCTS only works during validation (when the model is not tracking gradients) elif torch.max(probs).detach() < MCTS_thresh: action, hidden_state, _ = MCTS.UCT_search(env, input_state, input_hidden_state, actor_model, critic_model, 5, env.action_space, 100) action = torch.tensor(action, device=config.DEVICE) else: action = m.sample() actor_model.saved_action_values.append(m.log_prob(action)) if critic_model != None: state_value = critic_model(input_state, input_hidden_state) critic_model.saved_state_values.append(state_value) if teacher_model != None: # Add policy distillation error actor_probs, _ = actor_model(input_state, input_hidden_state, K) supervised_probs, _ = teacher_model(input_state, input_hidden_state, K) KL_error = utils.KL_divergence(actor_probs, supervised_probs, K) return action, hidden_state, KL_error.item() return action, hidden_state, None
def MTCS_player(): copy_files("MCTS.py") copy_files("constants.py") copy_files("puzzle.py") import MCTS value = [] for i in range(10): gamegrid = GameGrid() while (gamegrid.is_over == False): # time.sleep(1) event = MCTS.mcts_process(gamegrid.matrix) gamegrid.action(event) gamegrid.update_grid_cells() print("%dth step's max value is %d" % (i, gamegrid.max_value)) value.append(gamegrid.max_value) gamegrid.windows.destroy() print("*" * 50) print("the max value is ", max(value))
def __init__(self, numOfiterations, network, path, eta=1.0, decay=0.85, rollout=None, balance=0): self.datalist = [] self.numOfiterations = numOfiterations self.network = ExpandingFn(network) self.eta = eta self.decay = decay self.balance = balance self.mcts = MCTS.MCTS(eta=self.eta) self.path = path self.finalDataList = [] self.isFinished = 0 self.rollout = rollout pass
def create_game_data(new_game): return_li = [] while not new_game.end: mcts_game_tree = MCTS.TreeSearch(new_game) mcts_game_tree.search_tree(15) best_node = mcts_game_tree.get_best_move() new_game.board = best_node.game.board new_game.end = best_node.game.end new_game.winner = best_node.game.winner if not new_game.end: new_game.turn = best_node.game.turn else: new_game.turn = best_node.game.turn + 1 if not new_game.end: return_li.append(new_game.game_deep_copy(new_game, new_game.color)) return return_li, new_game
def main(): BoardObj = TicTacToe() currNode = Node(expanded=False, visited=True, TotalSimualtionReward=0, totalNumVisit=1, TicTacToe=BoardObj, parent=None) print("Initial Board setting") currNode.TicTacToe.print_board() while not currNode.Terminal: if currNode.TicTacToe.moveCnt & 1: x = int(raw_input('Enter row position\n')) y = int(raw_input('Enter column position\n')) TicTacToeObj = cp.deepcopy(currNode.TicTacToe) try: TicTacToeObj.make_move(x, y) except: continue nextNode = currNode.compareTo(TicTacToeObj.board) if nextNode is None: nextNode = Node(expanded=False, visited=True, TotalSimualtionReward=0, totalNumVisit=1, TicTacToe=TicTacToeObj, parent=None) else: nextNode = MCTS.MonteCarloTreeSearch(currNode, 0.1) print("After {} Move".format(nextNode.TicTacToe.moveCnt)) print(nextNode.TotalSimualtionReward) print(nextNode.TotalNumVisit) nextNode.TicTacToe.print_board() currNode = nextNode if currNode.TicTacToe.draw: print("Match is Drawn") else: if currNode.TicTacToe.moveCnt & 1: print("First Player won") else: print("Second Player won")
def generate_games(self, episodes, snapshots, batch_size, sim_time=0, rollouts_per_move=0, generate_random=False): # geenrate a game and add to replay_buffer print("Net will be cahced after the following episodes:", snapshots) self.actor.save(self.name + "0") if generate_random: generator = actors.Random(self.state_manager) else: generator = self.actor for i in range(1, episodes + 1): board = self.state_manager.get_start() MC = MCTS.MonteCarlo(self.start_player, self.state_manager, generator) while True: # Do rollouts for sim_time seconds/ rollouts_per_move rollouts MC.search(sim_time=sim_time, simulations=rollouts_per_move) distribution = MC.get_move_distribution() self.replay_buffer.append((board, distribution)) # Get next state based on rollouts board, move = MC.best_move() winner = self.state_manager.winner(board) if self.verbose: self.state_manager.print_move(move) self.state_manager.print_board(board) print() # set new root MC.purge_tree(board) if winner != 0: break # lock before accessing shared actor self.actor.train_network_random_minibatch(self.replay_buffer, batch_size=batch_size) if i in snapshots: self.actor.save(self.name + str(i))
def __init__(self, auto): # Initialize board state if auto: self.p1 = "IaGo(SLPolicy)" self.model = network.SLPolicy() serializers.load_npz('./models/sl_model.npz', self.model) else: self.p1 = "You" self.model = None self.p2 = "IaGo(PV-MCTS)" self.state = np.zeros([8, 8], dtype=np.float32) self.state[4, 3] = 1 self.state[3, 4] = 1 self.state[3, 3] = 2 self.state[4, 4] = 2 # Initialize game variables self.stone_num = 4 self.play_num = 1 self.pass_flg = False self.date = datetime.now().strftime("%Y-%m-%d-%H-%M") self.gamelog = "IaGo \n" + self.date + "\n" self.mcts = MCTS.MCTS()
def brain(self, board, opponent): if self.level == 'random': # print('000000000') _, action = self.randomchoice(board) elif self.level == 'minmax': # print('1111111') _, action = self.minimax(board, opponent) elif self.level == 'minimax_alphabeta': # print('22222222') _, action = self.minimax_alpha_beta(board, opponent) elif self.level == 'MCTS3s': # print('3333333333333') ai = MCTS.MCTS(board, opponent) _, action = ai.get_action() else: # print('444444444444444444') ai = MCTS_selection.MCTS(board, opponent) _, action = ai.get_action() # print(action) if action is None: action = [9, 9] # assert action is not None, 'action is None' return action
def play_game(self, net): #plays a complete game with a neural network against itself #returns list of tuples that represent turns #each tuple has input (1x9x9xF), best_action (1x81), result (1x1) self.init_game() turns = [] while self.state == -1: state = self.get_convnet_input().reshape((1,9,9,2*self.NUMBER_OF_SAVED_GAME_STATES + 1)) action, policy = MCTS.run_mcts(self, net) policy = policy.reshape((1,81)) turns.append((state, policy, np.zeros((1,1)))) self.move(self.cnn_action_to_coords(action)) #update value vector with game state (differentiate between players) for i, turn in enumerate(turns): if self.state == 0: #draw turns[i][2][0,0] = 0.5 elif self.state == 1: turns[i][2][0,0] = 1 - (i % 2) elif self.state == 2: turns[i][2][0,0] = 1 - ((i + 1) % 2) return turns
def do_POST(self): ctype, pdict = cgi.parse_header(self.headers['content-type']) if ctype == 'application/x-www-form-urlencoded': length = int(self.headers.getheader('content-length')) data = cgi.parse_qs(self.rfile.read(length), keep_blank_values=1) game = None wait_time = 2 if "board" in data: board, player = Othello.str_to_board(data["board"][0]) game = Othello(board=board, player=player) # print ("Game board received. Player = " + player + ", board:") # print (Othello.print_board(board)) else: self.send_error(415, "No value named 'board'.") return if "wait_time" in data: wait_time = int(data["wait_time"][0]) mcts = MCTS.MCTS(prior_prob=RandomNetwork(), rollout_policy=RandomNetwork(), seconds_per_move=wait_time) move = mcts.suggest_move(game) # print ("move: " + str(move)) return_data = json.dumps({'move': move}) else: self.send_error( 415, "Only application/x-www-form-urlencoded data is supported.") return self.send_response(200) self.send_header('Content-type', 'application/json') self.send_header('Access-Control-Allow-Origin', '*') self.end_headers() self.wfile.write(return_data)
def reset(self, observed_graph, gt_graph, task_goal, seed=0, simulator_type='python', is_alice=False): self.last_action = None self.last_subgoal = None """TODO: do no need this?""" self.previous_belief_graph = None self.belief = belief.Belief(gt_graph, agent_id=self.agent_id, seed=seed) # print("set") self.belief.sample_from_belief() graph_belief = self.sample_belief( observed_graph ) #self.env.get_observations(char_index=self.char_index)) try: self.sim_env.reset(graph_belief, task_goal) except: import ipdb ipdb.set_trace() self.sim_env.to_pomdp() self.mcts = MCTS(self.sim_env, self.agent_id, self.char_index, self.max_episode_length, self.num_simulation, self.max_rollout_steps, self.c_init, self.c_base, seed=seed)
def tick(self): # At the start of the tick we update the state so the MCTS is # aware of the current bot positions. if self.initialise_bots == True: bot_positions = [] for bot in self.game.bots_available: bot_positions.append(bot.position) self.game_state.SetInitialPositions(bot_positions) self.initialise_bots = False if len(self.game.bots_available) == 4: # start_time = default_timer() m = MCTS.UCT(rootstate=self.game_state, itermax=10000, verbose=False) # Once we have the best move we issue it to any bots that are available. self.game_state.DoMoves(m) self.IssueMoves(m) # print default_timer() - start_time total = 0 for node in self.game_state.corridor_graph.nodes(): if self.game_state.explored[node] == True: total = total + 1 # print 'Iterations ', self.iterations # print ' The total number of nodes visited is:' # print total self.iterations += 1 #for bot in self.game.bots_alive: # self.rasterizeVisibility(bot.position, bot.facingDirection) #for bot in self.game.bots_available: # pos = self.level.findRandomFreePositionInBox(self.level.area) # self.issue(orders.Charge, bot, pos) self.window.dirty = True self.window.update()
def buildMCTS(self, state): lg.logger_mcts.info( '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) self.root = mc.Node(state) self.mcts = mc.MCTS(self.root, self.cpuct)
def human(player, board): print("Note: 'x' denotes black disks, 'o' denotes white disks.") print("After computer's response, the board becomes:") print Othello.print_board(board) while True: pos = [int(i) for i in raw_input('your move in "x y"> ').split()] if len(pos) == 2: move = pos[0] + pos[1] * 10 + 11 if move and check(int(move), player, board): return int(move) elif move: print 'Illegal move--try again.' else: print 'Illegal input--try again.' if __name__ == '__main__': model_human = human model_MCTS_random = MCTS.MCTS(prior_prob=RandomNetwork(), seconds_per_move=5, rollout_policy=RandomNetwork()) model_MCTS_policy = MCTS.MCTS( prior_prob=PolicyNetwork( "./model/policy_model_L_conv5*128_conv3*128*4_20.h5"), rollout_policy=PolicyNetwork( "./model/policy_model_L_conv5*128_conv3*128*4_20.h5"), seconds_per_move=5) MCTS.play_with_MCTS(Othello(), model_human, model_MCTS_random)
wins = 0 losses = 0 winrate = [] for i in range(G): # actual games if i == 0 and save_networks: nn_policy.model.save("networks/mcts" + str(size) + "_" + str(G) + "_" + str(M) + "_" + str(i)) if i % (G / 10) == 0 and not verbose: print((i / G) * 100, "%", "done") mcts = MCTS(statemanager=stateman, initial_state=game.__copy__(), target_policy=nn_policy, default_policy=nn_policy, tree_policy=policy, M=M) winner = play_game(mcts, nn_policy) if verbose: print("Winner:", winner) if i != G - 1: replay_buffer.clear() if winner == initial_player: wins += 1 else: losses += 1 winrate.append(wins / (i + 1)) if ((i % (save_networks_interval) == 0 and i != 0)
] bar.next() bar.finish() print( "[logger]: Saving dictionary of neighbors to speedup next time." ) with open( neighbors_prefix_path + "neighbors_{}.pkl".format(test_pt), 'wb') as f: pickle.dump(nearest_neighbors, f, pickle.HIGHEST_PROTOCOL) # 4. create the MCTree and instantiate the search branch_factor = len(input_text) MCTS.MCTree.__simulate_single_node = __override_simulate_single_node_torch # override MCTS __simulate_single_node method MCTS.MCTree.simulate = __override_simulate # override MCTS simulate method tree = MCTS.MCTree(branch_factor, max_depth, n_sims, l_rate, discount) if np.argmax(model.predict(x)) != np.argmax(y): print( "[logger-ERROR]: Prediction and true label are different: can't proceed in the analysis." ) continue else: number_verified += 1 y_hat = np.max( model.predict(x) ) # this is used for the 'gain' and hence for the MCTS-UCT heuristc true_label = np.argmax(model.predict(x)) while tree.actual_depth != tree.max_depth: v_star = tree.select() print("Node selected {} (depth={}), whose parent is {}".format(