示例#1
0
文件: mcts.py 项目: TaWeiTu/InforGo
 def _playout(self, state):
     """walking down playout_depth step using node.select(), simulate the game result with rollout policy"""
     node = self._root
     for d in range(self._playout_depth):
         if node.is_leaf():
             if state.terminate(): break
             valid_action = [
                 i for i in range(16)
                 if state.valid_action(*decode_action(i))
             ]
             height_total = 0
             for i in valid_action:
                 height_total += self._get_priority(
                     state.get_height(*decode_action(i)))
             act_prob = [(
                 act,
                 self._get_priority(state.get_height(*decode_action(act))) /
                 height_total) for act in valid_action]
             node._expand(act_prob)
         action = node._select(self._c)
         node = node._children[action]
         state.take_action(*decode_action(action))
     # v = TD(0) z = eligibility trace
     v = self._evaluate([state.get_state()],
                        [self._player]) if self._lamda < 1 else 0
     z = self._rollout(state) if self._lamda > 0 else 0
     leaf_value = (1 - self._lamda) * v + self._lamda * z
     node._back_prop(leaf_value, self._c)
示例#2
0
 def generate_action(self, state):
     move = get_winning_move(state, self._player)
     if len(move) > 0: return move[0][1], move[0][2]
     move = get_winning_move(state, self._opponent)
     if len(move) > 0: return move[0][1], move[0][2]
     env = State(state)
     actions = [i for i in range(16) if env.valid_action(*decode_action(i))]
     # random.shuffle(actions)
     return_action = decode_action(random.choice(actions))
     return return_action[0], return_action[1]
示例#3
0
    def _search(self,
                c_state,
                player,
                depth,
                max_level=True,
                alpha=-np.inf,
                beta=np.inf):
        """recursively search every possible action and evaluate the leaf nodes

        Arguments:
        c_state -- a copy of current state
        player -- current player
        depth -- depth left for furthur searching
        max_level -- whether the level is maximizer
        alpha -- best value along the path from root to maximizer
        beta -- worst value along the path from root to minimizer

        Returns:
        return_val -- value of this node
        return_action -- where return_val comes from
        """
        if c_state.terminate():
            if c_state.win(player): return 1, None
            if c_state.win(-player): return -1, None
            return 0, None
        if depth == 0:
            return self._evaluate([c_state.get_state()], [player]), None
        comp = (lambda a, b: max(a, b)) if max_level else (
            lambda a, b: min(a, b))
        return_val = -np.inf if max_level else np.inf
        return_action = -1
        actions = [
            i for i in range(16) if c_state.valid_action(*decode_action(i))
        ]
        random.shuffle(actions)
        for i in actions:
            n_state = State(c_state)
            r, c = decode_action(i)
            n_state.take_action(r, c)
            val, action = self._search(n_state, -player, depth - 1,
                                       not max_level, alpha, beta)
            return_val = comp(return_val, val)
            if val == return_val: return_action = i
            alpha, beta, prune = self._alpha_beta_pruning(
                return_val, max_level, alpha, beta)
            if prune: break
        return return_val, return_action
示例#4
0
文件: ai.py 项目: TaWeiTu/InforGo
    def get_action(self, state):
        """get action of InforGo given current state

        Arguments:
        state -- current state

        Returns:
        (row, col) denoting chosen action
        """
        player = 1 if self._play_first else -1
        act = decode_action(self._tree.get_action(state, player))
        # self.step(encode_action(act))
        return act
示例#5
0
文件: mcts.py 项目: TaWeiTu/InforGo
 def _rollout(self, state):
     """play simulation with _rollout_policy"""
     c_player = state.player
     step = 0
     while not state.terminate():
         if step > self._rollout_limit: return 0
         act = self._rollout_policy(state, c_player)
         # logger.debug("action: {}".format(act))
         state.take_action(*decode_action(act))
         c_player *= -1
         step += 1
     if state.win(self._player): return 1
     if state.win(-self._player): return -1
     return 0
示例#6
0
文件: mcts.py 项目: TaWeiTu/InforGo
    def _rollout_policy(self, state, player):
        """randomized rollout
        
        Arguments:
        state -- current state
        player -- current player

        Returns:
        action chosen by rollout policy
        """
        move = get_winning_move(state, player)
        if len(move) > 0: return encode_action((move[0][1], move[0][2]))
        move = get_winning_move(state, -player)
        if len(move) > 0: return encode_action((move[0][1], move[0][2]))
        valid_action = [
            i for i in range(16) if state.valid_action(*decode_action(i))
        ]
        return random.choice(valid_action)