示例#1
0
 def expand(self, leaf_state: TichuState) -> None:
     leaf_nid = self._graph_node_id(leaf_state)
     for action in leaf_state.possible_actions_gen():
         to_nid = self._graph_node_id(state=leaf_state.next_state(action))
         self.add_child_node(from_nid=leaf_nid,
                             to_nid=to_nid,
                             action=action)
示例#2
0
    def _tree_policy(self, history: StateActionHistory,
                     state: TichuState) -> TichuAction:
        """
        
        :param history: 
        :param state: Any Game-state in the game_graph, but may be a leaf
        :return: The selected action
        """

        self._visited.add(state)

        # find max (return uniformly at random from max utc)
        poss_actions = set(state.possible_actions())
        max_val = 0
        max_actions = list()
        for _, to_infoset, action in self.graph.out_edges_iter(nbunch=[state],
                                                               data='action',
                                                               default=None):
            if action in poss_actions:
                child_n = self.graph.node[to_infoset]
                self._possible.add(to_infoset)
                val = child_n['record'].uct(p=to_infoset.player_id)
                if max_val == val:
                    max_actions.append(action)
                elif max_val < val:
                    max_val = val
                    max_actions = [action]

        ret = random.choice(max_actions)
        # logging.debug(f"tree policy -> {ret}")
        return ret
示例#3
0
    def search(self,
               start_infoset: TichuState,
               iterations: int,
               cheat: bool = False) -> TichuAction:
        logging.debug(
            f"Starting Icarus search for {iterations} iterations; cheating: {cheat}"
        )
        # initialisation
        base_history = self.search_init(start_infoset)

        for iteration in range(iterations):
            # playout
            history = base_history.copy()
            root_state = start_infoset.determinization(
                observer_id=start_infoset.player_id, cheat=cheat)
            state = root_state
            while not state.is_terminal():
                action = self.policy(history=history, state=state)
                history.append(state=state, action=action)
                next_state = state.next_state(action, infoset=True)
                state = next_state

            # state is now terminal
            history.append(state=state, action=None)
            reward_vector = state.reward_vector()

            # backpropagation
            for record, capture_context in self.capture(history, root_state):
                self.backpropagation(record, capture_context, reward_vector)

        return self.best_action(start_infoset)
示例#4
0
    def best_action(self, state: TichuState) -> TichuAction:
        """

        :param state: 
        :return: The best action to play from the given state
        """
        nid = self._graph_node_id(state)

        assert nid in self.graph
        assert self.graph.out_degree(nid) > 0

        possactions = state.possible_actions()

        max_a = next(iter(possactions))
        max_v = -float('inf')
        for _, to_nid, action in self.graph.out_edges_iter(nid,
                                                           data='action',
                                                           default=None):
            if action in possactions:
                rec = self.graph.node[to_nid]['record']
                val = rec.ucb[state.player_id]
                logging.debug(f"   {val}->{action}: {rec}")
                if val > max_v:
                    max_v = val
                    max_a = action

        return max_a
示例#5
0
    def tree_selection(self, state: TichuState) -> TichuAction:
        """
        
        :param state:
        :return: 
        """
        # logging.debug("Tree selection")
        nid = self._graph_node_id(state)
        # store record for backpropagation
        rec = self.graph.node[nid]['record']
        self._visited_records.add(rec)

        # find max (return uniformly at random from max UCB1 value)
        poss_actions = set(state.possible_actions())
        max_val = -float('inf')
        max_actions = list()
        for _, to_nid, action in self.graph.out_edges_iter(nbunch=[nid],
                                                           data='action',
                                                           default=None):
            # logging.debug("Tree selection looking at "+str(action))
            if action in poss_actions:
                child_record = self.graph.node[to_nid]['record']
                self._available_records.add(child_record)
                val = child_record.ucb(p=state.player_id)
                if max_val == val:
                    max_actions.append(action)
                elif max_val < val:
                    max_val = val
                    max_actions = [action]

        next_action = random.choice(max_actions)
        # logging.debug(f"Tree selection -> {next_action}")
        return next_action
示例#6
0
    def _start_search(self, start_state: TichuState) -> TichuAction:
        logging.debug(
            f"agent {self.name} (pos {self._position}) starts search.")
        start_t = time.time()
        if len(start_state.possible_actions()) == 1:
            logging.debug(
                f"agent {self.name} (pos {self._position}) there is only one action to play."
            )
            action = next(iter(start_state.possible_actions()))
        else:
            action = self.search(start_state)

        logging.debug(
            f"agent {self.name} (pos {self._position}) found action: {action} (time: {time.time()-start_t})"
        )
        return action
示例#7
0
    def _expand_tree(self, leaf_state: TichuState) -> None:
        """
        Expand all possible actions from the leaf_state
        
        :param history: The StateActionHistory up to the leaf_state. leaf_state not included. Following should hold: history.last_state.next_state(history.last_action) == leaf_state
        :param leaf_state: 
        :return: None
        """

        # logging.debug('expanding tree')
        leaf_infostate = TichuState.from_tichustate(leaf_state)

        for action in leaf_state.possible_actions_gen():
            to_infoset = TichuState.from_tichustate(
                leaf_state.next_state(action))
            self._add_new_node_if_not_yet_added(infoset=to_infoset)
            self._add_new_edge(from_infoset=leaf_infostate,
                               to_infoset=to_infoset,
                               action=action)
示例#8
0
 def evaluate_state(self, state: TichuState) -> RewardVector:
     """
     
     :param state: 
     :return: 
     """
     points = state.count_points()
     assert points[0] == points[2] and points[1] == points[3]
     # reward is the difference to the enemy team
     r0 = points[0] - points[1]
     r1 = r0 * -1
     return (r0, r1, r0, r1)
示例#9
0
    def _must_expand(self, state: TichuState):
        if self._expanded:
            return False
        poss_acs = set(state.possible_actions())
        existing_actions = {
            action
            for _, _, action in self.graph.out_edges_iter(
                nbunch=[state], data='action', default=None)
        }
        if len(existing_actions) < len(poss_acs):
            return True

        # if all possible actions already exist -> must not expand
        return not poss_acs.issubset(existing_actions)
示例#10
0
    def is_fully_expanded(self, state: TichuState) -> bool:
        poss_acs = set(state.possible_actions())
        existing_actions = {
            action
            for _, _, action in self.graph.out_edges_iter(
                nbunch=[self._graph_node_id(state)],
                data='action',
                default=None)
        }
        if len(existing_actions) < len(poss_acs):
            return False

        # if all possible actions already exist -> is fully expanded
        return poss_acs.issubset(existing_actions)
示例#11
0
 def _create_tichu_state(self, round_history, wish: Optional[CardValue],
                         trick_on_table: Trick) -> TichuState:
     return TichuState(
         player_id=self.position,
         hand_cards=round_history.last_handcards,
         won_tricks=round_history.won_tricks,
         trick_on_table=trick_on_table,
         wish=wish,
         ranking=tuple(round_history.ranking),
         announced_tichu=frozenset(round_history.announced_tichus),
         announced_grand_tichu=frozenset(
             round_history.announced_grand_tichus),
         history=tuple([
             a for a in round_history.events
             if isinstance(a, (SimpleWinTrickEvent, CombinationAction,
                               PassAction))
         ]))
示例#12
0
    def search(self,
               root_state: TichuState,
               observer_id: int,
               iterations: int,
               cheat: bool = False,
               clear_graph_on_new_root=True) -> TichuAction:
        logging.debug(
            f"started {self.__class__.__name__} with observer {observer_id}, for {iterations} iterations and cheat={cheat}"
        )
        check_param(observer_id in range(4))
        self.observer_id = observer_id
        root_nid = self._graph_node_id(root_state)

        if root_nid not in self.graph and clear_graph_on_new_root:
            _ = self.graph.clear()
        else:
            logging.debug("Could keep the graph :)")
        self.add_root(root_state)

        iteration = 0
        while iteration < iterations:
            iteration += 1
            self._init_iteration()
            # logging.debug("iteration "+str(iteration))
            state = root_state.determinization(observer_id=self.observer_id,
                                               cheat=cheat)
            # logging.debug("Tree policy")
            leaf_state = self.tree_policy(state)
            # logging.debug("rollout")
            rollout_result = self.rollout_policy(leaf_state)
            # logging.debug("backpropagation")
            assert len(rollout_result) == 4
            self.backpropagation(reward_vector=rollout_result)

        action = self.best_action(root_state)
        logging.debug(f"size of graph after search: {len(self.graph)}")
        # self._draw_graph('./graphs/graph_{}.pdf'.format(time()))
        return action
示例#13
0
 def _rollout_policy(self, history: StateActionHistory,
                     state: TichuState) -> TichuAction:
     ret = state.random_action()
     # logging.debug(f"rollout policy -> {ret}")
     return ret
示例#14
0
 def _graph_node_id(self, state: TichuState) -> NodeID:
     return state.position_in_episode()
示例#15
0
 def evaluate_state(self, state: TichuState) -> RewardVector:
     points = state.count_points()
     assert points[0] == points[2] and points[1] == points[3]
     return points
示例#16
0
 def _graph_node_id(self, state: TichuState) -> NodeID:
     return state.unique_infoset_id(self.observer_id)