Exemplo n.º 1
0
 def default_policy(self, node: MPMCTSTreeNode, env: Env):
     if env.is_over():
         return env.get_payoffs()
     player_id = env.get_player_id()
     #print(player_id)
     action = random.sample(node.legal_actions[player_id], 1)[0]
     while not env.is_over():
         # step forward
         next_state, next_player_id = env.step(action, False)
         if not env.is_over():
             action = random.sample(next_state['legal_actions'], 1)[0]
     # game over
     return env.get_payoffs()
Exemplo n.º 2
0
    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()