示例#1
0
    def test_feature4(self):
        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |    O         |
        # |    X         |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_1_col2 = Board()
        b_1_col2.drop_piece(2, PLAYER_1)
        b_1_col2.drop_piece(2, PLAYER_2)

        self.assertEqual(heuristic_1.feature4(b_1_col2, PLAYER_1), float(120))

        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |    O     X   |
        # |    X     O   |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_1_col2_1_col5 = Board()
        b_1_col2_1_col5.drop_piece(2, PLAYER_1)
        b_1_col2_1_col5.drop_piece(5, PLAYER_2)
        b_1_col2_1_col5.drop_piece(5, PLAYER_1)
        b_1_col2_1_col5.drop_piece(2, PLAYER_2)

        self.assertEqual(heuristic_1.feature4(b_1_col2_1_col5, PLAYER_1),
                         float(190))
示例#2
0
    def test_feature1(self):
        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |O O O         |
        # |X X X X       |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b = Board()
        b.drop_piece(0, PLAYER_1)
        b.drop_piece(0, PLAYER_2)
        b.drop_piece(1, PLAYER_1)
        b.drop_piece(1, PLAYER_2)
        b.drop_piece(2, PLAYER_1)
        b.drop_piece(2, PLAYER_2)
        b.drop_piece(3, PLAYER_1)

        self.assertTrue(b.check_winner(PLAYER_1))
        self.assertFalse(b.check_winner(PLAYER_2))

        self.assertEqual(heuristic_1.feature1(b, PLAYER_1), np.inf)
示例#3
0
    def test_feature3(self):
        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |  O O         |
        # |  X X         |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_2_in_row_both_nbs_free = Board()
        b_2_in_row_both_nbs_free.drop_piece(1, PLAYER_1)
        b_2_in_row_both_nbs_free.drop_piece(1, PLAYER_2)
        b_2_in_row_both_nbs_free.drop_piece(2, PLAYER_1)
        b_2_in_row_both_nbs_free.drop_piece(2, PLAYER_2)

        self.assertEqual(
            heuristic_1.feature3(b_2_in_row_both_nbs_free, PLAYER_1),
            float(50000))

        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |  O           |
        # |O X X         |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_2_in_row_4_free = Board()
        b_2_in_row_4_free.drop_piece(1, PLAYER_1)
        b_2_in_row_4_free.drop_piece(1, PLAYER_2)
        b_2_in_row_4_free.drop_piece(2, PLAYER_1)
        b_2_in_row_4_free.drop_piece(0, PLAYER_2)

        self.assertEqual(heuristic_1.feature3(b_2_in_row_4_free, PLAYER_1),
                         float(30000))
示例#4
0
    def play_round(self,
                   num_reads: int) -> Tuple[Optional[str], List[np.ndarray]]:
        """
        Evaluate the trained network by playing matches between the current and the previous NN
        @param num_reads: see args
        """
        print("Starting game round...")
        # randomly choose starting player
        if np.random.uniform(0, 1) <= 0.5:
            white = self.current
            black = self.best
            w = "current"
            b = "best"
        else:
            white = self.best
            black = self.current
            w = "best"
            b = "current"

        # initializing
        current_board = Board()
        game_won = False
        dataset = []
        value = 0
        temperature = 0.1  # exploration vs exploitation factor (smaller -> more exploitation)

        while not game_won and current_board.is_playable():
            dataset.append(copy.deepcopy(current_board.encode()))
            # get Policy
            if current_board.player == PLAYER_1:
                root = UCT_search(current_board, num_reads, white)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "white = %s" % (str(w)))
            elif current_board.player == PLAYER_2:
                root = UCT_search(current_board, num_reads, black)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "black = %s" % (str(b)))
            else:
                raise AssertionError("Invalid player.")
            # Chose a Column with given policy
            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece
            print(current_board)
            if current_board.check_winner():  # someone wins
                if current_board.player == PLAYER_1:  # black wins
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    value = 1
                game_won = True
        # Append new board to the dataset encoded in one-hot-encoding manner
        dataset.append(current_board.encode())
        if value == -1:
            dataset.append(f"{b} as black wins")
            return b, dataset
        elif value == 1:
            dataset.append(f"{w} as white wins")
            return w, dataset
        else:
            dataset.append("Nobody wins")
            return None, dataset
示例#5
0
    def test_feature2(self):
        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |  O O         |
        # |  X X X O     |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_3_in_row_1_neighbour = Board()
        b_3_in_row_1_neighbour.drop_piece(1, PLAYER_1)
        b_3_in_row_1_neighbour.drop_piece(1, PLAYER_2)
        b_3_in_row_1_neighbour.drop_piece(2, PLAYER_1)
        b_3_in_row_1_neighbour.drop_piece(2, PLAYER_2)
        b_3_in_row_1_neighbour.drop_piece(3, PLAYER_1)
        b_3_in_row_1_neighbour.drop_piece(4, PLAYER_2)

        self.assertEqual(
            heuristic_1.feature2(b_3_in_row_1_neighbour, PLAYER_1),
            float(900000))

        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |O   O         |
        # |X   X X O     |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b_3_in_row_1_gap = Board()
        b_3_in_row_1_gap.drop_piece(0, PLAYER_1)
        b_3_in_row_1_gap.drop_piece(0, PLAYER_2)
        b_3_in_row_1_gap.drop_piece(2, PLAYER_1)
        b_3_in_row_1_gap.drop_piece(2, PLAYER_2)
        b_3_in_row_1_gap.drop_piece(3, PLAYER_1)
        b_3_in_row_1_gap.drop_piece(4, PLAYER_2)

        self.assertEqual(heuristic_1.feature2(b_3_in_row_1_gap, PLAYER_1),
                         float(900000))
示例#6
0
def self_play(net: Connect4Network, start_index: np.int, cpu_index: np.int,
              num_games: np.int, args: AlphaZeroArgs, iteration: np.int):
    """
    Self Play of AlphaZero, generating and saving Datasets for the training of the Neural Network
    @param net:
    @param start_index: Start index of Self Play games
    @param cpu_index:
    @param num_games:
    @param args:
    @param iteration: current Iteration
    """

    # number of more random moves, before lowering temp
    n_max_moves = 11

    print(f"CPU={cpu_index}: Starting MCTS")
    iteration_dir = f"./datasets/iter_{iteration}"

    if not os.path.isdir(iteration_dir):
        os.makedirs(iteration_dir)

    # Play self play games
    for idx in range(start_index, num_games + start_index):
        print(f"Game {idx}")

        current_board = Board()
        game_won = False  # indicates that a game is won

        dataset = []
        states = []
        value = 0
        move_count = 0

        while not game_won and current_board.is_playable():
            t = 0.1
            # less random further into the game
            if move_count < n_max_moves:
                t = args.temperature_mcts

            # save current board state (encoded and unencoded)
            states.append(current_board.current_board.copy())
            board_state = current_board.encode().copy()

            root = UCT_search(current_board, args.num_reads_mcts, net)

            policy = get_policy(root, t)
            print(f"Game {idx} policy: {policy}")

            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece

            dataset.append([board_state, policy])
            print(f"[Iteration: {iteration}]: Game {idx} CURRENT BOARD:\n",
                  current_board)

            move_count += 1
            if current_board.check_winner():  # if somebody won
                if current_board.player == PLAYER_1:  # black wins
                    print("Black wins")
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    print("White wins")
                    value = 1
                game_won = True

        dataset_p = []

        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])

        # Save the dataset
        time_string = datetime.datetime.today().strftime("%Y-%m-%d")
        pickle_file = f"iter_{iteration}/dataset_iter{iteration}_cpu{cpu_index}_{idx}_{time_string}"
        util.pickle_save(pickle_file, dataset_p)