Пример #1
0
    def play_round(self,
                   num_reads: int) -> Tuple[Optional[str], List[np.ndarray]]:
        """
        Evaluate the trained network by playing matches between the current and the previous NN
        @param num_reads: see args
        """
        print("Starting game round...")
        # randomly choose starting player
        if np.random.uniform(0, 1) <= 0.5:
            white = self.current
            black = self.best
            w = "current"
            b = "best"
        else:
            white = self.best
            black = self.current
            w = "best"
            b = "current"

        # initializing
        current_board = Board()
        game_won = False
        dataset = []
        value = 0
        temperature = 0.1  # exploration vs exploitation factor (smaller -> more exploitation)

        while not game_won and current_board.is_playable():
            dataset.append(copy.deepcopy(current_board.encode()))
            # get Policy
            if current_board.player == PLAYER_1:
                root = UCT_search(current_board, num_reads, white)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "white = %s" % (str(w)))
            elif current_board.player == PLAYER_2:
                root = UCT_search(current_board, num_reads, black)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "black = %s" % (str(b)))
            else:
                raise AssertionError("Invalid player.")
            # Chose a Column with given policy
            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece
            print(current_board)
            if current_board.check_winner():  # someone wins
                if current_board.player == PLAYER_1:  # black wins
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    value = 1
                game_won = True
        # Append new board to the dataset encoded in one-hot-encoding manner
        dataset.append(current_board.encode())
        if value == -1:
            dataset.append(f"{b} as black wins")
            return b, dataset
        elif value == 1:
            dataset.append(f"{w} as white wins")
            return w, dataset
        else:
            dataset.append("Nobody wins")
            return None, dataset
Пример #2
0
def self_play(net: Connect4Network, start_index: np.int, cpu_index: np.int,
              num_games: np.int, args: AlphaZeroArgs, iteration: np.int):
    """
    Self Play of AlphaZero, generating and saving Datasets for the training of the Neural Network
    @param net:
    @param start_index: Start index of Self Play games
    @param cpu_index:
    @param num_games:
    @param args:
    @param iteration: current Iteration
    """

    # number of more random moves, before lowering temp
    n_max_moves = 11

    print(f"CPU={cpu_index}: Starting MCTS")
    iteration_dir = f"./datasets/iter_{iteration}"

    if not os.path.isdir(iteration_dir):
        os.makedirs(iteration_dir)

    # Play self play games
    for idx in range(start_index, num_games + start_index):
        print(f"Game {idx}")

        current_board = Board()
        game_won = False  # indicates that a game is won

        dataset = []
        states = []
        value = 0
        move_count = 0

        while not game_won and current_board.is_playable():
            t = 0.1
            # less random further into the game
            if move_count < n_max_moves:
                t = args.temperature_mcts

            # save current board state (encoded and unencoded)
            states.append(current_board.current_board.copy())
            board_state = current_board.encode().copy()

            root = UCT_search(current_board, args.num_reads_mcts, net)

            policy = get_policy(root, t)
            print(f"Game {idx} policy: {policy}")

            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece

            dataset.append([board_state, policy])
            print(f"[Iteration: {iteration}]: Game {idx} CURRENT BOARD:\n",
                  current_board)

            move_count += 1
            if current_board.check_winner():  # if somebody won
                if current_board.player == PLAYER_1:  # black wins
                    print("Black wins")
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    print("White wins")
                    value = 1
                game_won = True

        dataset_p = []

        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])

        # Save the dataset
        time_string = datetime.datetime.today().strftime("%Y-%m-%d")
        pickle_file = f"iter_{iteration}/dataset_iter{iteration}_cpu{cpu_index}_{idx}_{time_string}"
        util.pickle_save(pickle_file, dataset_p)