def action(self, board, trainer=None):
        """
        Method to select and place a worker, afterwards, place a building/
        If trainer is specified, will call corresponding search tree and update weights
        Otherwise, uses the specified weights and searches with a minimax tree with alpha beta pruning.
        """
        board_levels, all_worker_coords = FastBoard.convert_board_to_array(
            board)
        fast_board = FastBoard()
        if trainer != None:
            if isinstance(trainer, RootStrapAB):
                minimax_tree = MinimaxWithPruning(board_levels,
                                                  all_worker_coords, self.name,
                                                  self.search_depth,
                                                  fast_board, trainer.weights,
                                                  'V2')
            elif isinstance(trainer, TreeStrapMinimax):
                minimax_tree = Minimax(board_levels, all_worker_coords,
                                       self.name, self.search_depth,
                                       fast_board, trainer.weights, 'V2')

            new_board_levels, new_worker_coords = minimax_tree.get_best_node()
            new_board = FastBoard.convert_array_to_board(
                board, new_board_levels, new_worker_coords)
            #update weights if in training mode.
            trainer.update_weights(minimax_tree)
        else:
            minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords,
                                              self.name, self.search_depth,
                                              fast_board, self.trained_weights,
                                              'V2')
            new_board_levels, new_worker_coords = minimax_tree.get_best_node()
            new_board = FastBoard.convert_array_to_board(
                board, new_board_levels, new_worker_coords)
        return new_board
示例#2
0
class Node():
    """
    Class representing the a node in the MCTS
    """
    def __init__(self, state, parent=None):
        self.state = state
        self.children = {}
        self.parent = parent
        self.visit_count = 0
        self.value_sum = 0
        self.to_play = state.Player_turn()
        self.fast_board = FastBoard()

    def add_children(self, children):
        for child in children:
            self.children.add(child)

    def value(self):
        """
        Calculates the value
        """
        if self.visit_count == 0:
            return 0
        else:
            return self.value_sum / self.visit_count

    def is_expanded(self):
        return len(self.children) > 0

    def select_action(self, temperature):
        """
        Select an action based on visit count and temperature
        """
        visit_counts = np.array(
            [child.visit_count for child in self.children.keys()])
        actions = list(self.children.keys())
        if temperature == 0:
            new_state = actions[np.argmax(visit_counts)]
        elif temperature == np.inf:
            new_state = np.random.choice(actions)
        else:
            visit_count_distribution = visit_counts**(1 / temperature)
            visit_count_distribution = visit_count_distribution / sum(
                visit_count_distribution)
            new_state = np.random.choice(actions, p=visit_count_distribution)

        return new_state

    def select_child(self):
        """
        Selects the child with the highest UCB score
        """
        children_nodes = list(self.children.keys())
        UCB_score = list(map(upper_confidence_bound, children_nodes))
        return children_nodes[np.argmax(UCB_score)]

    def expand(self):
        """ 
        Expand Node
        """
        build, worker = self.fast_board.convert_board_to_array(self.state)
        children = self.fast_board.all_possible_next_states(
            build, worker, self.state.Player_turn())
        b_children = [
            self.fast_board.convert_array_to_board(self.state, i, j)
            for i, j in children
        ]
        for child in b_children:
            self.children[Node(child, parent=self)] = child
        pass
示例#3
0
class Trainer_CNN(Trainer):
    def __init__(self, player, args, NN=None):
        self.args = args
        self.state = Board(LinearRlAgentV2("A", 3), LinearRlAgentV2("B", 3))
        self.training_examples = []
        self.mcts = None
        self.nn = NN if NN != None else ValueFunc()
        self.loss_array = []
        self.mappings = {
            (0, None): 0,
            (1, None): 1,
            (2, None): 2,
            (3, None): 3,
            (4, None): 4,
            (0, 'A'): 5,
            (1, 'A'): 6,
            (2, 'A'): 7,
            (3, 'A'): 8,
            (0, 'B'): 9,
            (1, 'B'): 10,
            (2, 'B'): 11,
            (3, 'B'): 12,
        }
        self.nn.to(self.nn.device)
        self.name = player
        self.workers = [Worker([], str("A") + "1"), Worker([], str("A") + "2")]
        self.fast_board = FastBoard()

    def convertTo2D(self, board):
        """
        Takes in a board and converts it into 2D tensor form with shape (2, 5, 5)
        """
        data = []
        buildings = []
        players = []
        for squares in board.board:
            temp_lst = []
            temp_lst2 = []
            for square in squares:
                if square.worker == None:
                    temp_lst.append(square.building_level / 4)
                    temp_lst2.append(0)
                elif square.worker.name[0] == "A":
                    temp_lst.append(square.building_level / 4)
                    temp_lst2.append(1)
                else:
                    temp_lst.append(square.building_level / 4)
                    temp_lst2.append(-1)
            buildings.append(temp_lst)
            players.append(temp_lst2)
        data.append(buildings)
        data.append(players)
        return torch.as_tensor(data)

    def convert_nodes_to_training_data(self, set_of_nodes):
        training_data = [(i.state, i.value()) for i in set_of_nodes]
        shuffle(training_data)
        return training_data

    def generate_training_data(self):
        """
        Perform iteration of MCTS and return a collapsed tree for training
        """
        print("\nGenerating Data")

        temp_MCTS = self.mcts
        node = self.mcts.root
        training_data = []
        """
        for i in tqdm(range(self.args['Iterations'])):
            temp_MCTS.run(node.state.Player_turn())
        training_data = temp_MCTS.collapse()
        """
        for i in tqdm(range(self.args["Num_Simulations"])):
            root = temp_MCTS.breadth_run(node)
            app = list(temp_MCTS.collapse(root))
            training_data += app
            node = root.select_child()

        return training_data

    def save_checkpoint(self, folder):
        """
        Save the Neural Network
        """
        if not os.path.exists(folder):
            os.mkdir(folder)

        filepath = os.path.join(folder, "MCTS_AI_CNN")
        torch.save(self.nn.state_dict(), filepath)

    def learn(self, train_examples):
        """
        Learn using One MCTS tree
        """
        print("\nLearning from Data")

        for i in range(len(train_examples)):
            target = torch.tensor(train_examples[i][1],
                                  dtype=torch.float32).to(self.nn.device)
            target = target.view(1)
            converted_state = self.convertTo2D(train_examples[i][0])
            pred = torch.nn.forward(converted_state).to(t.nn.device)
            loss = self.nn.loss(pred, target)
            self.nn.optimizer.zero_grad()
            loss.backward()
            self.nn.optimizer.step()
            self.loss_array.append(loss.item())

        self.plot_loss()

    def train(self):
        self.loss_array = []
        for i in tqdm(range(self.args["epochs"])):
            training_examples = self.generate_training_data()
            training_examples = self.convert_nodes_to_training_data(
                training_examples)
            self.learn(training_examples)
        self.save_checkpoint(r'C:\Users\sarya\Desktop\Semester 4\ISM\Game')
        pass

    def action(self, board):
        build, worker = self.fast_board.convert_board_to_array(board)
        pos_states = self.fast_board.all_possible_next_states(
            build, worker, board.Player_turn())
        b_pos_states = [
            self.fast_board.convert_array_to_board(board, i, j)
            for i, j in pos_states
        ]
        values = []
        for state in b_pos_states:
            converted_state = self.convertTo2D(state)
            values.append(
                torch.flatten(
                    self.nn.forward(converted_state).to(self.nn.device)))
        if board.Player_turn() == "A":
            return b_pos_states[torch.argmax(torch.cat(values)).item()]
        else:
            return b_pos_states[torch.argmin(torch.cat(values)).item()]

    def place_workers(self, board):
        """
        Method to randomly place agent's workers on the board
        """
        place_count = 0
        while place_count < 2:
            try:
                coords = [np.random.randint(0, 5), np.random.randint(0, 5)]
                # Updates worker and square
                self.workers[place_count].update_location(coords)
                board.board[coords[0]][coords[1]].update_worker(
                    self.workers[place_count])
                place_count += 1
            except Exception:
                continue
        return board
    def action(self, board, trainer=None):
        """
        Method to select and place a worker, afterwards, place a building/
        If trainer is specified, will call corresponding search tree and update weights
        Otherwise, uses the specified weights and searches with a minimax tree with alpha beta pruning.
        """
        board_levels, all_worker_coords = FastBoard.convert_board_to_array(
            board)
        fast_board = FastBoard()
        if trainer != None:
            if isinstance(trainer, RootStrapAB):
                minimax_tree = MinimaxWithPruning(board_levels,
                                                  all_worker_coords, self.name,
                                                  self.search_depth,
                                                  fast_board, trainer.weights,
                                                  'V1')
            elif isinstance(trainer, TreeStrapMinimax):
                minimax_tree = Minimax(board_levels, all_worker_coords,
                                       self.name, self.search_depth,
                                       fast_board, trainer.weights, 'V1')

            new_board_levels, new_worker_coords = minimax_tree.get_best_node()
            new_board = FastBoard.convert_array_to_board(
                board, new_board_levels, new_worker_coords)
            #update weights if in training mode.
            trainer.update_weights(minimax_tree)
        else:
            search_depth = self.search_depth
            #adaptive depth when not in training mode
            if self.adaptive_search:
                my_num_moves = len(
                    fast_board.all_possible_next_states(
                        board_levels, all_worker_coords, self.name))
                if self.name == 'A':
                    opponent = 'B'
                else:
                    opponent = 'A'
                opp_num_moves = len(
                    fast_board.all_possible_next_states(
                        board_levels, all_worker_coords, opponent))
                if self.search_depth % 2 == 0:
                    next_search = self.name
                else:
                    next_search = opponent
                if my_num_moves + opp_num_moves < 20:
                    search_depth = self.search_depth + 3
                elif my_num_moves + opp_num_moves < 30:
                    search_depth = self.search_depth + 2
                elif my_num_moves + opp_num_moves < 40:
                    search_depth = self.search_depth + 1
                elif (my_num_moves < 20 and next_search == self.name) or (
                        opp_num_moves < 20 and next_search == opponent):
                    search_depth = self.search_depth + 1
                print(
                    f'Search Depth is {search_depth}, my moves = {my_num_moves}, opp moves = {opp_num_moves}'
                )
            minimax_tree = MinimaxWithPruning(board_levels, all_worker_coords,
                                              self.name, search_depth,
                                              fast_board, self.trained_weights,
                                              'V1')
            new_board_levels, new_worker_coords = minimax_tree.get_best_node()
            new_board = FastBoard.convert_array_to_board(
                board, new_board_levels, new_worker_coords)
        return new_board