Пример #1
0
    def rollout_policy(self, state):
        # random policy
        # action = random.choice(self.action_list)

        # DQNs policy
        state = self.getStateRepresentation(state)

        action_ind = BaseDynaAgent.policy(self, state)
        action = self.action_list[action_ind.item()]

        return action
Пример #2
0
 def policy(self, state):
     if self.episode_counter % 2 == 1:
         action, sub_tree = None, None
         for i in range(self.num_iterations):
             action, sub_tree = self.MCTS_iteration()
         # self.render_tree()
         self.subtree_node = sub_tree
         action = torch.from_numpy(np.array(
             [self.getActionIndex(action)])).unsqueeze(0).to(self.device)
     else:
         action = BaseDynaAgent.policy(self, state)
     return action