def search(self, start_infoset: TichuState, iterations: int, cheat: bool = False) -> TichuAction: logging.debug( f"Starting Icarus search for {iterations} iterations; cheating: {cheat}" ) # initialisation base_history = self.search_init(start_infoset) for iteration in range(iterations): # playout history = base_history.copy() root_state = start_infoset.determinization( observer_id=start_infoset.player_id, cheat=cheat) state = root_state while not state.is_terminal(): action = self.policy(history=history, state=state) history.append(state=state, action=action) next_state = state.next_state(action, infoset=True) state = next_state # state is now terminal history.append(state=state, action=None) reward_vector = state.reward_vector() # backpropagation for record, capture_context in self.capture(history, root_state): self.backpropagation(record, capture_context, reward_vector) return self.best_action(start_infoset)
def search(self, root_state: TichuState, observer_id: int, iterations: int, cheat: bool = False, clear_graph_on_new_root=True) -> TichuAction: logging.debug( f"started {self.__class__.__name__} with observer {observer_id}, for {iterations} iterations and cheat={cheat}" ) check_param(observer_id in range(4)) self.observer_id = observer_id root_nid = self._graph_node_id(root_state) if root_nid not in self.graph and clear_graph_on_new_root: _ = self.graph.clear() else: logging.debug("Could keep the graph :)") self.add_root(root_state) iteration = 0 while iteration < iterations: iteration += 1 self._init_iteration() # logging.debug("iteration "+str(iteration)) state = root_state.determinization(observer_id=self.observer_id, cheat=cheat) # logging.debug("Tree policy") leaf_state = self.tree_policy(state) # logging.debug("rollout") rollout_result = self.rollout_policy(leaf_state) # logging.debug("backpropagation") assert len(rollout_result) == 4 self.backpropagation(reward_vector=rollout_result) action = self.best_action(root_state) logging.debug(f"size of graph after search: {len(self.graph)}") # self._draw_graph('./graphs/graph_{}.pdf'.format(time())) return action