def test_move_is_hashable(self): moves = { Move.play(Point(1, 1)): 1, Move.resign(): 2, } self.assertEqual(1, moves[Move.play(Point(1, 1))]) self.assertEqual(2, moves[Move.resign()])
def read_move(self): is_play = self.read_bool() is_pass = self.read_bool() is_resign = self.read_bool() if is_play: row = self.read_int() col = self.read_int() return Move.play(Point(row=row, col=col)) if is_pass: return Move.pass_turn() assert is_resign return Move.resign()
def select_move(self, game_state): start = time.time() self.root = None if self._ladder_rollouts > 0: self.root = self.read_ladders(game_state, self._ladder_rollouts) if self.root is None: self.root = self.create_node(game_state, add_noise=True) num_rollouts = 0 while num_rollouts < self._num_rollouts: to_expand = set() batch_count = 0 while batch_count < self._batch_size: # Find a leaf. node = self.root move = self.select_branch(node) while node.has_child(move): node.add_virtual_loss(move) node = node.get_child(move) move = self.select_branch(node) node.add_virtual_loss(move) batch_count += 1 to_expand.add((node, move)) batch_num_visits = len(to_expand) new_children = self.create_children(to_expand) for new_child in new_children: new_child.parent.record_visit(new_child.move, new_child.value) num_rollouts += batch_num_visits # Now select a move in proportion to how often we visited it. visit_counts = self.root.visit_counts expected_values = calc_expected_values(self.root.total_values, visit_counts) tiebreak = 0.499 * (expected_values + 1) decide_vals = visit_counts + tiebreak for move_idx in np.argsort(decide_vals): visit_count = visit_counts[move_idx] if visit_count > 0: sys.stderr.write('{}: {:.3f} {}\n'.format( format_move(self._encoder.decode_move_index(move_idx)), expected_values[move_idx], visit_count)) temperature = self._temp_schedule.get(game_state.num_moves) if temperature > 0: move_indices, = np.where(visit_counts > 0) raw_counts = decide_vals[move_indices] p = np.power(raw_counts, 1.0 / temperature) p /= np.sum(p) move_index = np.random.choice(move_indices, p=p) else: move_index = np.argmax(decide_vals) self._log_pv(self.root) chosen_move = self._encoder.decode_move_index(move_index) sys.stderr.write('Select {} Q {:.3f}\n'.format( format_move(chosen_move), expected_values[move_index])) end = time.time() sys.stderr.write('Decided in {:.3f}s\n'.format(end - start)) sys.stderr.flush() if expected_values[move_index] < self._resign_below: sys.stderr.write('Resigning because Q {:.3f} < {:.3f}\n'.format( expected_values[move_index], self._resign_below)) return Move.resign() if self._gracious_winner is not None: if game_state.last_move is not None and game_state.last_move == Move.pass_turn( ): pass_idx = self._encoder.encode_move(Move.pass_turn()) if visit_counts[pass_idx] >= 2 and \ expected_values[pass_idx] > self._gracious_winner: sys.stderr.write('Pass has Q {:.3f}\n'.format( expected_values[pass_idx])) return Move.pass_turn() return chosen_move