Exemplo n.º 1
0
    def test_simulation_can_recover_from_sucide_move_white(self):
        model = self.model
        board, player = game_init()

        x = randrange(SIZE)
        y = randrange(SIZE)
        for i in range(SIZE):
            for j in range(SIZE):
                make_play(0, SIZE, board)  # Black does not play playing
                if i == x and j == y:
                    make_play(0, SIZE, board)  # pass on one intersection
                else:
                    make_play(i, j, board)
        make_play(0, SIZE, board)  # Black does not play playing

        policies, values = model.predict_on_batch(board)
        policy = policies[0]
        policy[y * SIZE + x], policy[SIZE * SIZE] = policy[
            SIZE * SIZE], policy[y * SIZE + x]  # Make best move sucide
        mask = legal_moves(board)
        policy = ma.masked_array(policy, mask=mask)
        self.assertEqual(np.argmax(policy),
                         y * SIZE + x)  # Best option in policy is sucide
        tree = new_tree(policy, board)
        chosen_play = select_play(policy,
                                  board,
                                  mcts_simulations=128,
                                  mcts_tree=tree,
                                  temperature=0,
                                  model=model)

        # First simulation chooses pass, second simulation chooses sucide (p is still higher),
        # then going deeper it chooses pass again (value is higher)
        self.assertEqual(chosen_play, SIZE * SIZE)  # Pass move is best option
Exemplo n.º 2
0
    def genmove(self, color):
        policies, values = self.model.predict_on_batch(self.board)
        policy = policies[0]
        value = values[0]
        if self.resign and value <= self.resign:
            x = 0
            y = SIZE + 1
            return x, y, policy, value, self.board, self.player

        if not self.mcts_tree or not self.mcts_tree['subtree']:
            self.mcts_tree = new_tree(policy, self.board)

        index = select_play(policy, self.board, self.mcts_simulations,
                            self.mcts_tree, self.temperature, self.model)
        logger.info("Generated index %s", index)
        x, y = index2coord(index)
        # show_tree(x, y, self.mcts_tree)

        policy_target = np.zeros(SIZE * SIZE + 1)
        for _index, d in self.mcts_tree['subtree'].items():
            policy_target[_index] = d['p']

        self.board, self.player = self.play(color, x, y)
        return x, y, policy_target, value, self.board, self.player