示例#1
0
 def observe(self, action: Dict[Any, types.NestedArray], next_timestep: dm_env.TimeStep):
     for id in action.keys():
         ts = next_timestep._replace(observation=next_timestep.observation[id], reward=next_timestep.reward[id])
         self._actors[id].observe(action=action[id], next_timestep=ts)
示例#2
0
def select_rgb_observation(timestep: dm_env.TimeStep) -> dm_env.TimeStep:
    """Replaces an observation tuple by its first entry (the RGB observation)."""
    return timestep._replace(observation=timestep.observation[0])
示例#3
0
 def _process_timestep(self, timestep: dm_env.TimeStep) -> dm_env.TimeStep:
     observation = tree.map_structure(lambda stacker, x: stacker.step(x),
                                      self._stackers, timestep.observation)
     return timestep._replace(observation=observation)
示例#4
0
 def _convert_timestep(self, timestep: dm_env.TimeStep) -> dm_env.TimeStep:
     return timestep._replace(reward=_convert_value(timestep.reward),
                              discount=_convert_value(timestep.discount),
                              observation=_convert_value(
                                  timestep.observation).T.flatten())