Пример #1
0
def remove_dead_stones_and_score(game_state):
    b_resign = False
    w_resign = False
    if game_state.last_move.is_resign:
        if game_state.next_player == Player.black:
            w_resign = True
        else:
            b_resign = True
    
    if b_resign or w_resign:
        return GameResult(
            b_resign, w_resign, 
            0.0, 0.0,
            game_state.komi(), game_state.board)

    end_game = game_state
    while end_game.last_move == Move.pass_turn():
        end_game = end_game.previous_state
    final_board = remove_dead_stones(end_game)
    final_state = GameState.from_board(
        final_board,
        game_state.next_player,
        game_state.komi())
    final_state = final_state.apply_move(Move.pass_turn())
    final_state = final_state.apply_move(Move.pass_turn())
    return compute_game_result(final_state)
Пример #2
0
 def decode_move_index(self, index):
     """Turn an integer index into a board point."""
     if index == self._pass_idx:
         return Move.pass_turn()
     row = index // self._board_size
     col = index % self._board_size
     return Move.play(Point(row=row + 1, col=col + 1))
Пример #3
0
def decode_sgf_move(sgf_move, num_rows):
    if sgf_move == '':
        return Move.pass_turn()
    assert len(sgf_move) == 2
    col = ALPHABET.index(sgf_move[0]) + 1
    row = num_rows - ALPHABET.index(sgf_move[1])
    return Move.play(Point(row, col))
Пример #4
0
 def read_move(self):
     is_play = self.read_bool()
     is_pass = self.read_bool()
     is_resign = self.read_bool()
     if is_play:
         row = self.read_int()
         col = self.read_int()
         return Move.play(Point(row=row, col=col))
     if is_pass:
         return Move.pass_turn()
     assert is_resign
     return Move.resign()
Пример #5
0
    def select_move(self, game_state):
        start = time.time()
        self.root = None
        if self._ladder_rollouts > 0:
            self.root = self.read_ladders(game_state, self._ladder_rollouts)
        if self.root is None:
            self.root = self.create_node(game_state, add_noise=True)

        num_rollouts = 0
        while num_rollouts < self._num_rollouts:
            to_expand = set()
            batch_count = 0
            while batch_count < self._batch_size:
                # Find a leaf.
                node = self.root
                move = self.select_branch(node)
                while node.has_child(move):
                    node.add_virtual_loss(move)
                    node = node.get_child(move)
                    move = self.select_branch(node)
                node.add_virtual_loss(move)
                batch_count += 1
                to_expand.add((node, move))

            batch_num_visits = len(to_expand)
            new_children = self.create_children(to_expand)
            for new_child in new_children:
                new_child.parent.record_visit(new_child.move, new_child.value)
            num_rollouts += batch_num_visits

        # Now select a move in proportion to how often we visited it.
        visit_counts = self.root.visit_counts
        expected_values = calc_expected_values(self.root.total_values,
                                               visit_counts)
        tiebreak = 0.499 * (expected_values + 1)
        decide_vals = visit_counts + tiebreak
        for move_idx in np.argsort(decide_vals):
            visit_count = visit_counts[move_idx]
            if visit_count > 0:
                sys.stderr.write('{}: {:.3f} {}\n'.format(
                    format_move(self._encoder.decode_move_index(move_idx)),
                    expected_values[move_idx], visit_count))
        temperature = self._temp_schedule.get(game_state.num_moves)
        if temperature > 0:
            move_indices, = np.where(visit_counts > 0)
            raw_counts = decide_vals[move_indices]
            p = np.power(raw_counts, 1.0 / temperature)
            p /= np.sum(p)
            move_index = np.random.choice(move_indices, p=p)
        else:
            move_index = np.argmax(decide_vals)

        self._log_pv(self.root)

        chosen_move = self._encoder.decode_move_index(move_index)
        sys.stderr.write('Select {} Q {:.3f}\n'.format(
            format_move(chosen_move), expected_values[move_index]))
        end = time.time()
        sys.stderr.write('Decided in {:.3f}s\n'.format(end - start))
        sys.stderr.flush()
        if expected_values[move_index] < self._resign_below:
            sys.stderr.write('Resigning because Q {:.3f} < {:.3f}\n'.format(
                expected_values[move_index], self._resign_below))
            return Move.resign()

        if self._gracious_winner is not None:
            if game_state.last_move is not None and game_state.last_move == Move.pass_turn(
            ):
                pass_idx = self._encoder.encode_move(Move.pass_turn())
                if visit_counts[pass_idx] >= 2 and \
                        expected_values[pass_idx] > self._gracious_winner:
                    sys.stderr.write('Pass has Q {:.3f}\n'.format(
                        expected_values[pass_idx]))
                    return Move.pass_turn()
        return chosen_move