Exemplo n.º 1
0
 def true_values_for_sample(self, states, actions, assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_values_for_sample(
         self, states, string_actions, assume_optimal_policy
     )
Exemplo n.º 2
0
 def true_values_for_sample(self, states, actions, assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_values_for_sample(
         self, states, string_actions, assume_optimal_policy
     )
 def transition_probabilities(self, state, action) -> np.ndarray:
     if action == "C":
         next_state = self._cheat_step(state)
         probabilities = np.zeros((self.width * self.height,))
         probabilities[next_state] = 1
         return probabilities
     else:
         return GridworldBase.transition_probabilities(self, state, action)
 def true_values_for_sample(self, states, actions,
                            assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.ACTIONS[int(list(action.keys())[0]) -
                                            self.num_states])
     return GridworldBase.true_values_for_sample(self, states,
                                                 string_actions,
                                                 assume_optimal_policy)
Exemplo n.º 5
0
 def possible_next_actions(self, state, ignore_terminal=False) -> List[str]:
     if ignore_terminal is False and self.is_terminal(state):
         return []
     possible_actions = GridworldBase.possible_next_actions(
         self, state, ignore_terminal)
     if ignore_terminal is False:
         # Also ignore cheat actions when ignoring terminal
         possible_actions.append('C')
     return possible_actions
Exemplo n.º 6
0
 def step(self,
          action: str,
          with_possible=True) -> Tuple[int, float, bool, List[str]]:
     if action == 'C':
         self._state: int = self._cheat_step(self._state)
         reward = self.reward(self._state)
         possible_next_action = self.possible_next_actions(self._state)
         return self._state, reward, self.is_terminal(
             self._state), possible_next_action
     else:
         return GridworldBase.step(self, action)
Exemplo n.º 7
0
 def true_rewards_for_sample(self, states, actions):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_rewards_for_sample(self, states, string_actions)
Exemplo n.º 8
0
 def true_rewards_for_sample(self, states, actions):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_rewards_for_sample(self, states, string_actions)