示例#1
0
    def test_move_is_hashable(self):
        moves = {
            Move.play(Point(1, 1)): 1,
            Move.resign(): 2,
        }

        self.assertEqual(1, moves[Move.play(Point(1, 1))])
        self.assertEqual(2, moves[Move.resign()])
示例#2
0
 def read_move(self):
     is_play = self.read_bool()
     is_pass = self.read_bool()
     is_resign = self.read_bool()
     if is_play:
         row = self.read_int()
         col = self.read_int()
         return Move.play(Point(row=row, col=col))
     if is_pass:
         return Move.pass_turn()
     assert is_resign
     return Move.resign()
示例#3
0
文件: bot.py 项目: macfergus/badukai
    def select_move(self, game_state):
        start = time.time()
        self.root = None
        if self._ladder_rollouts > 0:
            self.root = self.read_ladders(game_state, self._ladder_rollouts)
        if self.root is None:
            self.root = self.create_node(game_state, add_noise=True)

        num_rollouts = 0
        while num_rollouts < self._num_rollouts:
            to_expand = set()
            batch_count = 0
            while batch_count < self._batch_size:
                # Find a leaf.
                node = self.root
                move = self.select_branch(node)
                while node.has_child(move):
                    node.add_virtual_loss(move)
                    node = node.get_child(move)
                    move = self.select_branch(node)
                node.add_virtual_loss(move)
                batch_count += 1
                to_expand.add((node, move))

            batch_num_visits = len(to_expand)
            new_children = self.create_children(to_expand)
            for new_child in new_children:
                new_child.parent.record_visit(new_child.move, new_child.value)
            num_rollouts += batch_num_visits

        # Now select a move in proportion to how often we visited it.
        visit_counts = self.root.visit_counts
        expected_values = calc_expected_values(self.root.total_values,
                                               visit_counts)
        tiebreak = 0.499 * (expected_values + 1)
        decide_vals = visit_counts + tiebreak
        for move_idx in np.argsort(decide_vals):
            visit_count = visit_counts[move_idx]
            if visit_count > 0:
                sys.stderr.write('{}: {:.3f} {}\n'.format(
                    format_move(self._encoder.decode_move_index(move_idx)),
                    expected_values[move_idx], visit_count))
        temperature = self._temp_schedule.get(game_state.num_moves)
        if temperature > 0:
            move_indices, = np.where(visit_counts > 0)
            raw_counts = decide_vals[move_indices]
            p = np.power(raw_counts, 1.0 / temperature)
            p /= np.sum(p)
            move_index = np.random.choice(move_indices, p=p)
        else:
            move_index = np.argmax(decide_vals)

        self._log_pv(self.root)

        chosen_move = self._encoder.decode_move_index(move_index)
        sys.stderr.write('Select {} Q {:.3f}\n'.format(
            format_move(chosen_move), expected_values[move_index]))
        end = time.time()
        sys.stderr.write('Decided in {:.3f}s\n'.format(end - start))
        sys.stderr.flush()
        if expected_values[move_index] < self._resign_below:
            sys.stderr.write('Resigning because Q {:.3f} < {:.3f}\n'.format(
                expected_values[move_index], self._resign_below))
            return Move.resign()

        if self._gracious_winner is not None:
            if game_state.last_move is not None and game_state.last_move == Move.pass_turn(
            ):
                pass_idx = self._encoder.encode_move(Move.pass_turn())
                if visit_counts[pass_idx] >= 2 and \
                        expected_values[pass_idx] > self._gracious_winner:
                    sys.stderr.write('Pass has Q {:.3f}\n'.format(
                        expected_values[pass_idx]))
                    return Move.pass_turn()
        return chosen_move