Пример #1
0
    def rollout(self, node):
        rollout_mat = np.copy(node.game_state)
        zeros = np.where(rollout_mat == 0)
        unvisited_node_list = list(zip(zeros[0], zeros[1]))
        random.shuffle(unvisited_node_list)

        board_full = len(np.where(rollout_mat == 0)[0]) == 0
        if node.move is not None:
            have_winner = check_for_win(rollout_mat, node.move)
        else:
            have_winner = None
        cur_player = node.player

        while have_winner is None and (not board_full):
            cur_player *= -1
            move = unvisited_node_list.pop()
            rollout_mat[move[0]][move[1]] = cur_player

            # Check whether the board is full
            board_full = len(np.where(rollout_mat == 0)[0]) == 0
            have_winner = check_for_win(rollout_mat, move)

        if board_full:
            return 0
        else:
            return have_winner
Пример #2
0
    def policy_rollout(self, rollout_mat, move, player):
        zeros = np.where(rollout_mat == 0)
        unvisited_node_list = list(zip(zeros[0], zeros[1]))
        random.shuffle(unvisited_node_list)

        board_full = len(np.where(rollout_mat == 0)[0]) == 0
        if move is not None:
            have_winner = check_for_win(rollout_mat, move)
        else:
            have_winner = None
        cur_player = player

        while have_winner is None and (not board_full):
            cur_player *= -1
            move = unvisited_node_list.pop()
            rollout_mat[move[0]][move[1]] = cur_player

            # Fast model rollout
            # X_input = self._rollout_encoder.encode(rollout_mat,cur_player, move)
            # position_priority = list(np.argsort(self._rollout_model.predict(X_input))[0][::-1])
            # for position in position_priority:
            #     i = position // 8
            #     j = position % 8
            #     if rollout_mat[i][j] == 0:
            #         break
            # rollout_mat[i][j] = cur_player

            # Check whether the board is full
            board_full = len(np.where(rollout_mat == 0)[0]) == 0
            have_winner = check_for_win(rollout_mat, move)

        if board_full:
            return 0
        else:
            return have_winner
Пример #3
0
    def select_move(self, mat, move):
        cur_time = time.time()

        root = MCTSNode(mat, self.cur_player, move=move)
        simulation_num = 0

        #for i in range(self.simulation_number):
        while time.time() - cur_time < 10:
            node = root

            # Step 1: Selection
            while node.move is not None and (
                    not node.can_add_child()) and check_for_win(
                        node.game_state, node.move) is None:
                node = self.select_child(node)

            # Step 2: Expansion
            if node.can_add_child():
                node = node.expansion()

            # Step 3: Rollout and get the winner
            winner = self.rollout(node)

            # Step 4: Back propagate to update the score
            while node is not None:
                if node.player == winner:
                    node.win_num += 1
                if winner == 0:
                    node.win_num += 0.5
                node.visit_num += 1
                node = node.parent

            simulation_num += 1

        visited_num_mat = np.zeros((8, 8))
        winning_num_mat = np.zeros((8, 8))
        winning_percent_mat = np.zeros((8, 8))

        best_move_mat = None
        best_move = None
        best_visit_num = -1.0
        for child in root.children:
            child_visit_num = child.visit_num
            move = child.move
            visited_num_mat[move[0]][move[1]] = child.visit_num
            winning_percent_mat[move[0]][move[1]] = np.round(
                child.winning_percent(), 3)
            winning_num_mat[move[0]][move[1]] = child.win_num

            if child_visit_num > best_visit_num:
                best_visit_num = child_visit_num
                best_move_mat = child.game_state
                best_move = child.move

        #print(visited_num_mat)
        pprint_tree(root)
        return visited_num_mat, simulation_num
Пример #4
0
    def select_move(self, mat, move):
        time_start = time.time()
        self.root = AlphaGomokuNode(mat, self.cur_player, move=move)

        for simulation in range(self.simulation_number):
            current_state = mat
            node = self.root

            for depth in range(self.depth):
                if not node.children:
                    if check_for_win(node.game_state, node.move) is not None:
                        break
                    moves, probabilities = self.policy_probabilities(
                        current_state, move)
                    node.expand_children(moves, probabilities)

                move, node = node.select_child()
                current_state = np.copy(node.game_state)
                current_state[move[0]][move[1]] = node.player * -1

            current_state_input = self._deep_encoder.encode(
                current_state, node.player, move)
            value = self._value_model.predict(current_state_input)[0][0]

            if node.player == -1:
                value *= -1

            rollout = self.policy_rollout(current_state, move, node.player)
            weighted_value = (
                1 - self.lambda_value) * value + self.lambda_value * rollout
            node.update_values(weighted_value)

        move = max(self.root.children,
                   key=lambda move: self.root.children.get(move).visit_count)
        mat[move[0]][move[1]] = self.cur_player

        pprint_tree(self.root)
        print("Total time spent", time.time() - time_start)

        return mat, move
Пример #5
0
def best_child(node):
    for child in node.children.items():
        if check_for_win(child[1].state) == -1:
            return child[1]
    return max(node.children.values(), key=lambda x: x.visited_number)
Пример #6
0
 def is_terminal(self):
     result, _ = check_for_win(self.game_state, self.move)
     return result