def rollout_policy(self, state): # random policy # action = random.choice(self.action_list) # DQNs policy state = self.getStateRepresentation(state) action_ind = BaseDynaAgent.policy(self, state) action = self.action_list[action_ind.item()] return action
def policy(self, state): if self.episode_counter % 2 == 1: action, sub_tree = None, None for i in range(self.num_iterations): action, sub_tree = self.MCTS_iteration() # self.render_tree() self.subtree_node = sub_tree action = torch.from_numpy(np.array( [self.getActionIndex(action)])).unsqueeze(0).to(self.device) else: action = BaseDynaAgent.policy(self, state) return action