Пример #1
0
def main():
    mcts_wins = 0
    draws = 0
    mcts_loses = 0
    for game in range(100):
        VF_pickle = open(
            "/Users/lpothabattula/Desktop/100_Days_of_ML_Code/Day4/TicTacToeValFun.pickle",
            "rb")
        ValueFunction = pickle.load(VF_pickle)
        BoardObj = TicTacToe()
        currNode = Node(expanded=False,
                        visited=True,
                        TotalSimualtionReward=0,
                        totalNumVisit=1,
                        TicTacToe=BoardObj,
                        parent=None)
        # print("Initial Board setting")
        # currNode.TicTacToe.print_board()
        while not currNode.Terminal:
            player = currNode.TicTacToe.moveCnt % 2 + 1
            if currNode.TicTacToe.moveCnt & 1:
                x, y = take_action(currNode.TicTacToe.board,
                                   Value=ValueFunction,
                                   player=player)
                TicTacToeObj = cp.deepcopy(currNode.TicTacToe)
                TicTacToeObj.make_move(x, y)
                nextNode = currNode.compareTo(TicTacToeObj.board)
                if nextNode is None:
                    nextNode = Node(expanded=False,
                                    visited=True,
                                    TotalSimualtionReward=0,
                                    totalNumVisit=1,
                                    TicTacToe=TicTacToeObj,
                                    parent=None)
            else:
                nextNode = MCTS.MonteCarloTreeSearch(currNode, 0.1)
            # print("After {} Move".format(nextNode.TicTacToe.moveCnt))
            # print(nextNode.TotalSimualtionReward)
            # print(nextNode.TotalNumVisit)
            # nextNode.TicTacToe.print_board()
            currNode = nextNode
        if currNode.TicTacToe.draw:
            draws += 1
            print("Match {}:Drawn".format(game))
        else:
            if currNode.TicTacToe.moveCnt & 1:
                mcts_wins += 1
                print("Match {}:First Player won".format(game))
            else:
                mcts_loses += 1
                print("Match {}:Second Player won".format(game))
    print("Final analysis:MCTS vs TD")
    print("MCTS won {} times".format(mcts_wins))
    print("match drawn {} times".format(draws))
    print("MCTS lost {} times".format(mcts_loses))
Пример #2
0
def playGame(brainPower):
    # temp game for the simulation
    tempGame = DotsAndBoxes.DotsAndBoxes()

    # value for creating IDs
    currentId = 0

    # root node for the tree
    root = DBNode.DBNode(tempGame, currentId, -1, (-1, 0, 0))
    currentId += 1

    # dictionary that will act as the game tree
    tree = dict()
    tree[root.id] = root

    while True:
        if not tempGame.player:
            nextNode = randomMove(root)

            # update the root for the computer
            root = tree[nextNode]

        else:
            if len(root.board.moves) < 12:
                rollouts = 12500
            elif len(root.board.moves) < 16:
                rollouts = 10000
            elif len(root.board.moves) < 22:
                rollouts = 7500
            else:
                rollouts = 3000
            rollouts *= brainPower
            nextComputerId, currentId = MCTS.MCTS(tree, currentId, root.id,
                                                  rollouts)
            nextComputerId, currentId = MCTS.MCTS(tree, currentId, root.id,
                                                  rollouts)

            # update the root
            root = tree[nextComputerId]

        if root.board.checkEnd():
            return (root.board.P1Score, root.board.P2Score)
Пример #3
0
    def game_loop_vs_random_player(self):

        # print board
        print(self)

        # create MCTS instance
        mcts = MCTS()

        # game loop
        while True:

            player_to_play = self.game.player_turn
            random_player = RandomAzulPlayerPlus.RandomAzulPlayerPlus("P1")

            if player_to_play == "P1":
                random_player.set_board(self.game)
                pit_choice, tile_type, column_choice = random_player.random_action(
                )
                self = self.make_move(player_to_play, pit_choice, tile_type,
                                      column_choice)
                print(self)
            #else gioca MCTS

            else:
                best_move = mcts.search(self)
                # legal moves available
                try:
                    # make AI move here
                    self = best_move.board
                    print("MCTS played:", self.game.pit_choice,
                          self.game.color_choice, self.game.row_choice)
                # game over
                except:
                    pass
                print(self)

                # print board

                # check if the game is won
            if self.game.is_done_phase:
                print("is done phase\n")
                break
Пример #4
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)

            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    node = mc.Node(newState)
                    self.mcts.addNode(node)
                else:
                    node = self.mcts.tree[newState.id]

                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        return ((value, breadcrumbs))
Пример #5
0
def playPacmans(gameGrid, maze, pacmans, gameStats, real):
    if not real:
        for i in range(len(pacmans)):
            pos = pacmans[i].getCord()
            maze[pos[0]][pos[1]].remove("pacman")

            pos = pacmans[i].move(gameGrid)
            maze[pos[0]][pos[1]].place("pacman", gameStats)
            pacmans[i].cord = pos
    else:
        mcts = MCTS.MCTS(gameGrid)
        mcts.play()
Пример #6
0
    def evaluate_leaf(self, leaf, value, over, backtrack):

        if over == 0:
            value, probabilities, allowed = self.get_predictions(leaf.state)

            # maybe trying to only have probs of allowed moves?
            probs = []
            for move in allowed:
                probs.append(probabilities[move])

            for idx, move in enumerate(allowed):
                new_state, _, _ = leaf.state.make_move(move)
                if new_state.id not in self.mcts.tree:
                    node = mc.Node(new_state)
                    self.mcts.add_node(node)
                else:
                    node = self.mcts.tree[new_state.id]

                new_edge = mc.Edge(leaf, node, probs[idx], move)
                leaf.edges.append((move, new_edge))

        return (value, backtrack)
Пример #7
0
    def __init__(self,
                 agent_id,
                 char_index,
                 max_episode_length,
                 num_simulation,
                 max_rollout_steps,
                 c_init,
                 c_base,
                 recursive=False,
                 num_samples=1,
                 num_processes=1,
                 comm=None,
                 logging=False,
                 logging_graphs=False,
                 seed=None):
        self.agent_type = 'MCTS'
        self.verbose = False
        self.recursive = recursive

        #self.env = unity_env.env
        if seed is None:
            seed = random.randint(0, 100)
        self.seed = seed
        self.logging = logging
        self.logging_graphs = logging_graphs

        self.agent_id = agent_id
        self.char_index = char_index
        self.sim_env = VhGraphEnv()
        self.sim_env.pomdp = True
        self.belief = None
        self.max_episode_length = max_episode_length
        self.num_simulation = num_simulation
        self.max_rollout_steps = max_rollout_steps
        self.c_init = c_init
        self.c_base = c_base
        self.num_samples = num_samples
        self.num_processes = num_processes

        self.previous_belief_graph = None
        self.verbose = False

        self.mcts = MCTS(self.sim_env, self.agent_id, self.char_index,
                         self.max_episode_length, self.num_simulation,
                         self.max_rollout_steps, self.c_init, self.c_base)

        if self.mcts is None:
            raise Exception

        # Indicates whether there is a unity simulation
        self.comm = comm
Пример #8
0
 def run(self, mcts_sim, games):
     for i in range(games):
         print("Game number", i + 1)
         best_path = list()
         mcts_current = MCTS(self.hex_state,
                             anet=self.anet,
                             verbose=self.verbose)
         game_sim = mcts_sim
         while not mcts_current.root_node.state.check_finished(
         ):  # Game has no winner
             next_node = mcts_current.run(game_sim)
             best_path.append(next_node)
             next_state = next_node.state
             mcts_current = MCTS(next_state,
                                 anet=self.anet,
                                 verbose=self.verbose)
             game_sim += self.sim_increment
         winner = mcts_current.root_node.state.player % 2 + 1
         if winner == 1:
             self.p1_wins += 1
         else:
             self.p2_wins += 1
         print("Player", winner, "won!!")
         for node in best_path:
             label = create_distribution(node.parent)
             board = node.parent.state.Hex_to_list()
             net_board = node.parent.state.list_to_net(board)
             self.add_data(net_board, label)
         self.train()
         if i % self.save_int == 0 and i != 0:
             if self.preload:
                 for case in self.buffer:
                     self.add_data_to_file("RBUF.txt", case[0], case[1])
             self.anet.save_model(self.file_add + str(i))
         if i % self.buffer_clear == 0 and i != 0:
             if len(self.buffer) > 500:
                 self.buffer = self.buffer[500:]
         self.hex_state.player = self.hex_state.change_player()
Пример #9
0
 def __init__(self, master=None, height=0, width=0):
     Tkinter.Canvas.__init__(self, master, height=height, width=width)
     self.step_record_chess_board = Record.Step_Record_Chess_Board()
     # 初始化記步器
     self.height = 15
     self.width = 15
     self.init_chess_board_points()  # 畫點
     self.init_chess_board_canvas()  # 畫棋盤
     self.board = MCTS.Board()
     self.n_in_row = 5
     self.n_playout = 400  # num of simulations for each move
     self.c_puct = 5
     """
     Important 1: Python is pass by reference
     So the self.board will be modified by other operations
     """
     self.AI = MCTS.MonteCarlo(self.board, 1)
     self.AI_1 = MCTS.MonteCarlo(self.board, 0)
     self.clicked = 1
     self.init = True  # first place is given by user (later need to be replaced as a random selection)
     self.train_or_play = True  # True - train, False - play
     self.step = 0
     self.text_id = None
Пример #10
0
def playing(model):
    gamegrid = puzzle.GameGrid()
    i = 0
    while (1):
        NN_data_temp, event = MCTS.mcts_process(gamegrid.matrix, model)
        gamegrid.action(event)
        print("step: ", i)
        i += 1
        for l in gamegrid.matrix:
            print(l, event)
        if gamegrid.is_over:
            score_tem = gamegrid.max_value
            break
    return score_tem
Пример #11
0
def AI_move():
    global total_time_cost
    global SIDE
    global left_side_mark
    global black_piece_count
    if not SIDE:  # white bot
        time_start = time.time()
        ret = MCTS.MCT_step(board_situation, black_piece, white_piece,
                            line_count, black_piece_count, white_piece_count)
        time_end = time.time()
        time_cost = round(time_end - time_start, 4)
        total_time_cost += time_cost
        print(ret)
        print('totally cost = ', time_cost, 's')
        if time_cost > 60:
            print("black win")
        move_piece_index = white_piece.index(ret[0])
        oldx = ret[0][0]
        oldy = ret[0][1]
        posx = ret[1][0]
        posy = ret[1][1]
        # update board situation
        board_situation[oldx][oldy] = 0
        board_situation[posx][posy] = 1
        line_count[oldx + 7] = line_count[oldx + 7] - 1
        line_count[oldy - 1] = line_count[oldy - 1] - 1
        line_count[oldx + oldy + 13] = line_count[oldx + oldy + 13] - 1
        if oldx != 8 or oldy != 1:
            line_count[oldx - oldy + 35] = line_count[oldx - oldy + 35] - 1
        # update piece info
        white_piece[move_piece_index] = [posx, posy]
        # update canvas
        board.coords(white_in_canvas[move_piece_index], posx * 30 + 4,
                     (9 - posy) * 30 + 4, posx * 30 + 26, (9 - posy) * 30 + 26)
        # eat piece
        if [posx, posy] in black_piece:  # one black out
            board.coords(black_in_canvas[black_piece.index([posx, posy])], 301,
                         301, 301, 301)  # move out
            black_piece[black_piece.index([posx, posy])] = [114, 114]
            black_piece_count = black_piece_count - 1
        else:
            line_count[posx + 7] = line_count[posx + 7] + 1
            line_count[posy - 1] = line_count[posy - 1] + 1
            line_count[posx + posy + 13] = line_count[posx + posy + 13] + 1
            if posx != 8 or posy != 1:
                line_count[posx - posy + 35] = line_count[posx - posy + 35] + 1
        # change side to black
        SIDE = 1
        right_side.delete(right_side_mark)
        left_side_mark = left_side.create_oval(39, 139, 61, 161, fill="black")
Пример #12
0
def deal_request():
    if request.method == "GET":
        # get通过request.args.get("param_name","")形式获取参数值
        #get_q = request.args.get("q","")
        #print("start ai")

        import sys
        sys.path.append("../AI/")
        import MCTS
        mcts_manager = MCTS.mcts(timeLimit=10000)  #开启mcts程序
        print("初始化中...")
        #from datetime import date
        from datetime import datetime
        #from datetime import timedelta

        start_datetime = datetime(2021, 2, 1, 9, 30, 0)
        end_datetime = datetime(2021, 2, 1, 20, 30, 0)
        initialstate = MCTS.State(nowspotname="横滨港未来21",
                                  travelpoint=0,
                                  totaltravelpoint=0,
                                  moneycost=0,
                                  onfoottime=0,
                                  now_datetime=start_datetime,
                                  end_datetime=end_datetime,
                                  hasbeenspots=[])
        print("初始化sucess")

        root = mcts_manager.search(initialState=initialstate)
        result = mcts_manager.getBestRoute(root)

        return str(result)

    elif request.method == "POST":
        # post通过request.form["param_name"]形式获取参数值
        post_q = request.form["q"]
        return render_template("result.html", result=post_q)
Пример #13
0
 def getAction(self, simulator):
     mcts = MCTS.MCTS(C=5)
     mcts.run(self.numOfiterations,
              simulator,
              self.network,
              rolloutFn=self.rollout,
              balance=self.balance)
     self.act_pro_pair = mcts.getPolicy()
     p = 0
     action = (-1, -1)
     for (act, pro) in self.act_pro_pair.items():
         if pro > p:
             p = pro
             action = act
         elif pro == p and np.random.random() > 0.5:
             action = act
     return action
Пример #14
0
def playing(cpuct, times):
    gamegrid = puzzle.GameGrid()
    i = 0
    while (1):
        event = MCTS.mcts_process(gamegrid.matrix,
                                  cpuct=cpuct,
                                  update_times=times)
        gamegrid.action(event)
        # print("step: ", i)
        i += 1
        for l in gamegrid.matrix:
            # print(l, event)
            pass
        if gamegrid.is_over:
            score_tem = gamegrid.max_value
            break
    return score_tem
Пример #15
0
def select_action(input_state,
                  input_hidden_state,
                  actor_model,
                  critic_model=None,
                  teacher_model=None,
                  K=1,
                  use_MLE=False,
                  MCTS_thresh=0):
    """Applies the model on a given input and hidden state to make a prediction of which action to take
        Can use MLE, MCTS, or sampling to select an action"""
    probs, hidden_state = actor_model(input_state, input_hidden_state)
    m = Categorical(probs)

    # Use MLE instead of sampling distribution
    if use_MLE:
        _, topi = probs.data.topk(1)
        action = topi.squeeze()

    # Note: MCTS only works during validation (when the model is not tracking gradients)
    elif torch.max(probs).detach() < MCTS_thresh:
        action, hidden_state, _ = MCTS.UCT_search(env, input_state,
                                                  input_hidden_state,
                                                  actor_model, critic_model, 5,
                                                  env.action_space, 100)
        action = torch.tensor(action, device=config.DEVICE)

    else:
        action = m.sample()

    actor_model.saved_action_values.append(m.log_prob(action))

    if critic_model != None:
        state_value = critic_model(input_state, input_hidden_state)
        critic_model.saved_state_values.append(state_value)

    if teacher_model != None:
        # Add policy distillation error
        actor_probs, _ = actor_model(input_state, input_hidden_state, K)
        supervised_probs, _ = teacher_model(input_state, input_hidden_state, K)
        KL_error = utils.KL_divergence(actor_probs, supervised_probs, K)
        return action, hidden_state, KL_error.item()

    return action, hidden_state, None
Пример #16
0
def MTCS_player():
    copy_files("MCTS.py")
    copy_files("constants.py")
    copy_files("puzzle.py")
    import MCTS

    value = []
    for i in range(10):
        gamegrid = GameGrid()
        while (gamegrid.is_over == False):
            # time.sleep(1)
            event = MCTS.mcts_process(gamegrid.matrix)
            gamegrid.action(event)
            gamegrid.update_grid_cells()
        print("%dth step's max value is %d" % (i, gamegrid.max_value))
        value.append(gamegrid.max_value)

        gamegrid.windows.destroy()
    print("*" * 50)
    print("the max value is ", max(value))
Пример #17
0
 def __init__(self,
              numOfiterations,
              network,
              path,
              eta=1.0,
              decay=0.85,
              rollout=None,
              balance=0):
     self.datalist = []
     self.numOfiterations = numOfiterations
     self.network = ExpandingFn(network)
     self.eta = eta
     self.decay = decay
     self.balance = balance
     self.mcts = MCTS.MCTS(eta=self.eta)
     self.path = path
     self.finalDataList = []
     self.isFinished = 0
     self.rollout = rollout
     pass
Пример #18
0
def create_game_data(new_game):
    return_li = []

    while not new_game.end:
        mcts_game_tree = MCTS.TreeSearch(new_game)
        mcts_game_tree.search_tree(15)
        best_node = mcts_game_tree.get_best_move()
        new_game.board = best_node.game.board
        new_game.end = best_node.game.end
        new_game.winner = best_node.game.winner

        if not new_game.end:
            new_game.turn = best_node.game.turn
        else:
            new_game.turn = best_node.game.turn + 1

        if not new_game.end:
            return_li.append(new_game.game_deep_copy(new_game, new_game.color))

    return return_li, new_game
Пример #19
0
def main():
    BoardObj = TicTacToe()
    currNode = Node(expanded=False,
                    visited=True,
                    TotalSimualtionReward=0,
                    totalNumVisit=1,
                    TicTacToe=BoardObj,
                    parent=None)
    print("Initial Board setting")
    currNode.TicTacToe.print_board()
    while not currNode.Terminal:
        if currNode.TicTacToe.moveCnt & 1:
            x = int(raw_input('Enter row position\n'))
            y = int(raw_input('Enter column position\n'))
            TicTacToeObj = cp.deepcopy(currNode.TicTacToe)
            try:
                TicTacToeObj.make_move(x, y)
            except:
                continue
            nextNode = currNode.compareTo(TicTacToeObj.board)
            if nextNode is None:
                nextNode = Node(expanded=False,
                                visited=True,
                                TotalSimualtionReward=0,
                                totalNumVisit=1,
                                TicTacToe=TicTacToeObj,
                                parent=None)
        else:
            nextNode = MCTS.MonteCarloTreeSearch(currNode, 0.1)
        print("After {} Move".format(nextNode.TicTacToe.moveCnt))
        print(nextNode.TotalSimualtionReward)
        print(nextNode.TotalNumVisit)
        nextNode.TicTacToe.print_board()
        currNode = nextNode
    if currNode.TicTacToe.draw:
        print("Match is Drawn")
    else:
        if currNode.TicTacToe.moveCnt & 1:
            print("First Player won")
        else:
            print("Second Player won")
Пример #20
0
    def generate_games(self,
                       episodes,
                       snapshots,
                       batch_size,
                       sim_time=0,
                       rollouts_per_move=0,
                       generate_random=False):
        # geenrate a game and add to replay_buffer
        print("Net will be cahced after the following episodes:", snapshots)
        self.actor.save(self.name + "0")
        if generate_random:
            generator = actors.Random(self.state_manager)
        else:
            generator = self.actor
        for i in range(1, episodes + 1):
            board = self.state_manager.get_start()
            MC = MCTS.MonteCarlo(self.start_player, self.state_manager,
                                 generator)
            while True:
                # Do rollouts for sim_time seconds/ rollouts_per_move rollouts
                MC.search(sim_time=sim_time, simulations=rollouts_per_move)
                distribution = MC.get_move_distribution()
                self.replay_buffer.append((board, distribution))
                # Get next state based on rollouts
                board, move = MC.best_move()
                winner = self.state_manager.winner(board)
                if self.verbose:
                    self.state_manager.print_move(move)
                    self.state_manager.print_board(board)
                    print()
                # set new root
                MC.purge_tree(board)

                if winner != 0:
                    break
            # lock before accessing shared actor
            self.actor.train_network_random_minibatch(self.replay_buffer,
                                                      batch_size=batch_size)

            if i in snapshots:
                self.actor.save(self.name + str(i))
Пример #21
0
    def __init__(self, auto):
        # Initialize board state
        if auto:
            self.p1 = "IaGo(SLPolicy)"
            self.model = network.SLPolicy()
            serializers.load_npz('./models/sl_model.npz', self.model)
        else:
            self.p1 = "You"
            self.model = None
        self.p2 = "IaGo(PV-MCTS)"

        self.state = np.zeros([8, 8], dtype=np.float32)
        self.state[4, 3] = 1
        self.state[3, 4] = 1
        self.state[3, 3] = 2
        self.state[4, 4] = 2
        # Initialize game variables
        self.stone_num = 4
        self.play_num = 1
        self.pass_flg = False
        self.date = datetime.now().strftime("%Y-%m-%d-%H-%M")
        self.gamelog = "IaGo \n" + self.date + "\n"
        self.mcts = MCTS.MCTS()
Пример #22
0
 def brain(self, board, opponent):
     if self.level == 'random':
         # print('000000000')
         _, action = self.randomchoice(board)
     elif self.level == 'minmax':
         # print('1111111')
         _, action = self.minimax(board, opponent)
     elif self.level == 'minimax_alphabeta':
         # print('22222222')
         _, action = self.minimax_alpha_beta(board, opponent)
     elif self.level == 'MCTS3s':
         # print('3333333333333')
         ai = MCTS.MCTS(board, opponent)
         _, action = ai.get_action()
     else:
         # print('444444444444444444')
         ai = MCTS_selection.MCTS(board, opponent)
         _, action = ai.get_action()
     # print(action)
     if action is None:
         action = [9, 9]
     # assert action is not None, 'action is None'
     return action
Пример #23
0
    def play_game(self, net):
        #plays a complete game with a neural network against itself
        #returns list of tuples that represent turns
        #each tuple has input (1x9x9xF), best_action (1x81), result (1x1)
        self.init_game()
        turns = []
        while self.state == -1:
            state = self.get_convnet_input().reshape((1,9,9,2*self.NUMBER_OF_SAVED_GAME_STATES + 1))
            action, policy = MCTS.run_mcts(self, net)
            policy = policy.reshape((1,81))
            turns.append((state, policy, np.zeros((1,1))))
            self.move(self.cnn_action_to_coords(action))

        #update value vector with game state (differentiate between players)
        for i, turn in enumerate(turns):
            if self.state == 0:
                #draw
                turns[i][2][0,0] = 0.5
            elif self.state == 1:
                turns[i][2][0,0] = 1 - (i % 2)
            elif self.state == 2:
                turns[i][2][0,0] = 1 - ((i + 1) % 2)
        return turns
Пример #24
0
    def do_POST(self):
        ctype, pdict = cgi.parse_header(self.headers['content-type'])

        if ctype == 'application/x-www-form-urlencoded':
            length = int(self.headers.getheader('content-length'))
            data = cgi.parse_qs(self.rfile.read(length), keep_blank_values=1)
            game = None
            wait_time = 2
            if "board" in data:
                board, player = Othello.str_to_board(data["board"][0])
                game = Othello(board=board, player=player)
                # print ("Game board received. Player = " + player + ", board:")
                # print (Othello.print_board(board))
            else:
                self.send_error(415, "No value named 'board'.")
                return

            if "wait_time" in data:
                wait_time = int(data["wait_time"][0])
            mcts = MCTS.MCTS(prior_prob=RandomNetwork(),
                             rollout_policy=RandomNetwork(),
                             seconds_per_move=wait_time)
            move = mcts.suggest_move(game)
            # print ("move: " + str(move))
            return_data = json.dumps({'move': move})
        else:
            self.send_error(
                415,
                "Only application/x-www-form-urlencoded data is supported.")
            return

        self.send_response(200)
        self.send_header('Content-type', 'application/json')
        self.send_header('Access-Control-Allow-Origin', '*')
        self.end_headers()

        self.wfile.write(return_data)
Пример #25
0
    def reset(self,
              observed_graph,
              gt_graph,
              task_goal,
              seed=0,
              simulator_type='python',
              is_alice=False):

        self.last_action = None
        self.last_subgoal = None
        """TODO: do no need this?"""

        self.previous_belief_graph = None
        self.belief = belief.Belief(gt_graph,
                                    agent_id=self.agent_id,
                                    seed=seed)
        # print("set")
        self.belief.sample_from_belief()
        graph_belief = self.sample_belief(
            observed_graph
        )  #self.env.get_observations(char_index=self.char_index))
        try:
            self.sim_env.reset(graph_belief, task_goal)
        except:
            import ipdb

            ipdb.set_trace()
        self.sim_env.to_pomdp()
        self.mcts = MCTS(self.sim_env,
                         self.agent_id,
                         self.char_index,
                         self.max_episode_length,
                         self.num_simulation,
                         self.max_rollout_steps,
                         self.c_init,
                         self.c_base,
                         seed=seed)
Пример #26
0
    def tick(self):
        # At the start of the tick we update the state so the MCTS is
        # aware of the current bot positions.
        if self.initialise_bots == True:
            bot_positions = []
            for bot in self.game.bots_available:
                bot_positions.append(bot.position)
            self.game_state.SetInitialPositions(bot_positions)
            self.initialise_bots = False

        if len(self.game.bots_available) == 4:
            #     start_time = default_timer()
            m = MCTS.UCT(rootstate=self.game_state,
                         itermax=10000,
                         verbose=False)
            # Once we have the best move we issue it to any bots that are available.
            self.game_state.DoMoves(m)
            self.IssueMoves(m)
    #     print default_timer() - start_time

        total = 0
        for node in self.game_state.corridor_graph.nodes():
            if self.game_state.explored[node] == True:
                total = total + 1
    # print 'Iterations ', self.iterations
    # print ' The total number of nodes visited is:'
    # print total
        self.iterations += 1
        #for bot in self.game.bots_alive:
        #    self.rasterizeVisibility(bot.position, bot.facingDirection)

        #for bot in self.game.bots_available:
        #    pos = self.level.findRandomFreePositionInBox(self.level.area)
        #    self.issue(orders.Charge, bot, pos)

        self.window.dirty = True
        self.window.update()
Пример #27
0
 def buildMCTS(self, state):
     lg.logger_mcts.info(
         '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
     self.root = mc.Node(state)
     self.mcts = mc.MCTS(self.root, self.cpuct)
Пример #28
0

def human(player, board):
    print("Note: 'x' denotes black disks, 'o' denotes white disks.")
    print("After computer's response, the board becomes:")
    print Othello.print_board(board)
    while True:
        pos = [int(i) for i in raw_input('your move in "x y"> ').split()]
        if len(pos) == 2:
            move = pos[0] + pos[1] * 10 + 11
            if move and check(int(move), player, board):
                return int(move)
            elif move:
                print 'Illegal move--try again.'
        else:
            print 'Illegal input--try again.'


if __name__ == '__main__':
    model_human = human
    model_MCTS_random = MCTS.MCTS(prior_prob=RandomNetwork(),
                                  seconds_per_move=5,
                                  rollout_policy=RandomNetwork())
    model_MCTS_policy = MCTS.MCTS(
        prior_prob=PolicyNetwork(
            "./model/policy_model_L_conv5*128_conv3*128*4_20.h5"),
        rollout_policy=PolicyNetwork(
            "./model/policy_model_L_conv5*128_conv3*128*4_20.h5"),
        seconds_per_move=5)
    MCTS.play_with_MCTS(Othello(), model_human, model_MCTS_random)
Пример #29
0
    wins = 0
    losses = 0
    winrate = []

    for i in range(G):  # actual games
        if i == 0 and save_networks:
            nn_policy.model.save("networks/mcts" + str(size) + "_" + str(G) +
                                 "_" + str(M) + "_" + str(i))

        if i % (G / 10) == 0 and not verbose:
            print((i / G) * 100, "%", "done")

        mcts = MCTS(statemanager=stateman,
                    initial_state=game.__copy__(),
                    target_policy=nn_policy,
                    default_policy=nn_policy,
                    tree_policy=policy,
                    M=M)

        winner = play_game(mcts, nn_policy)
        if verbose:
            print("Winner:", winner)
        if i != G - 1:
            replay_buffer.clear()
        if winner == initial_player:
            wins += 1
        else:
            losses += 1
        winrate.append(wins / (i + 1))

        if ((i % (save_networks_interval) == 0 and i != 0)
Пример #30
0
                    ]
                    bar.next()
                bar.finish()
                print(
                    "[logger]: Saving dictionary of neighbors to speedup next time."
                )
                with open(
                        neighbors_prefix_path +
                        "neighbors_{}.pkl".format(test_pt), 'wb') as f:
                    pickle.dump(nearest_neighbors, f, pickle.HIGHEST_PROTOCOL)

            # 4. create the MCTree and instantiate the search
            branch_factor = len(input_text)
            MCTS.MCTree.__simulate_single_node = __override_simulate_single_node_torch  # override MCTS __simulate_single_node method
            MCTS.MCTree.simulate = __override_simulate  # override MCTS simulate method
            tree = MCTS.MCTree(branch_factor, max_depth, n_sims, l_rate,
                               discount)
            if np.argmax(model.predict(x)) != np.argmax(y):
                print(
                    "[logger-ERROR]: Prediction and true label are different: can't proceed in the analysis."
                )
                continue
            else:
                number_verified += 1
            y_hat = np.max(
                model.predict(x)
            )  # this is used for the 'gain' and hence for the MCTS-UCT heuristc
            true_label = np.argmax(model.predict(x))

            while tree.actual_depth != tree.max_depth:
                v_star = tree.select()
                print("Node selected {} (depth={}), whose parent is {}".format(