def observe(self, action: Dict[Any, types.NestedArray], next_timestep: dm_env.TimeStep): for id in action.keys(): ts = next_timestep._replace(observation=next_timestep.observation[id], reward=next_timestep.reward[id]) self._actors[id].observe(action=action[id], next_timestep=ts)
def select_rgb_observation(timestep: dm_env.TimeStep) -> dm_env.TimeStep: """Replaces an observation tuple by its first entry (the RGB observation).""" return timestep._replace(observation=timestep.observation[0])
def _process_timestep(self, timestep: dm_env.TimeStep) -> dm_env.TimeStep: observation = tree.map_structure(lambda stacker, x: stacker.step(x), self._stackers, timestep.observation) return timestep._replace(observation=observation)
def _convert_timestep(self, timestep: dm_env.TimeStep) -> dm_env.TimeStep: return timestep._replace(reward=_convert_value(timestep.reward), discount=_convert_value(timestep.discount), observation=_convert_value( timestep.observation).T.flatten())