def OnEpisodeFinishedCallback(self, env: Environment, brain: Brain,
                               episode_idx: int, num_of_episodes: int,
                               episode_reward: float, steps: int):
     values = brain.GetValues(self._states)
     for idx, v in enumerate(values):
         for a in self._actions:
             self._value_traces[a][idx].append(v[a])
Пример #2
0
 def Decide(
     self,
     env: base.Environment,
     brain: base.Brain,
     state: base.State,
     episode_idx: int,
     num_of_episodes: int,
 ) -> base.Action:
     return env.GetActionFromChoice(
         numpy.random.choice(env.GetActionSpaceSize(),
                             p=brain.GetValues(state)[0]))
Пример #3
0
 def Decide(
     self,
     env: base.Environment,
     brain: base.Brain,
     state: base.State,
     episode_idx: int,
     num_of_episodes: int,
 ) -> base.Action:
     values = brain.GetValues(state)
     choice = int(numpy.argmax(values))
     logging.vlog(
         20,
         'making greedy decision for state %s using values: %s; choice: %d',
         state, values, choice)
     return env.GetActionFromChoice(choice)