Пример #1
0
 def true_values_for_sample(self, states, actions, assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_values_for_sample(
         self, states, string_actions, assume_optimal_policy
     )
Пример #2
0
 def true_values_for_sample(self, states, actions, assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.features_to_action(action))
     return GridworldBase.true_values_for_sample(
         self, states, string_actions, assume_optimal_policy
     )
 def true_values_for_sample(self, states, actions,
                            assume_optimal_policy: bool):
     string_actions = []
     for action in actions:
         string_actions.append(self.ACTIONS[int(list(action.keys())[0]) -
                                            self.num_states])
     return GridworldBase.true_values_for_sample(self, states,
                                                 string_actions,
                                                 assume_optimal_policy)