def _update_observation(self): return_obs = Observation() return_obs.doubleArray = np.zeros(MDPState.nfeatures) for effector, mapping in MDPState.description.iteritems(): pos = NaoWorldModel().get_effector_pos(effector) for key, axis in mapping.iteritems(): return_obs.doubleArray[axis] = pos[MDPState.key_to_index(key)] return return_obs
def start(self): if self._random_start: self._sensors = np.random.random(self._sensors.shape) self._sensors *= (self._limits[:, 1] - self._limits[:, 0]) self._sensors += self._limits[:, 0] else: self._sensors = np.zeros(self._sensors.shape) self._sensors[0] = -0.5 self._render(self._sensors[0]) return_obs = Observation() return_obs.doubleArray = self._sensors.tolist() return return_obs
def step(self, action): return_ro = Reward_observation_terminal() self._apply(action) self._render(self._sensors[0]) return_ro.terminal = self._is_terminal() return_ro.r = -1. if return_ro.terminal: return_ro.r = .0 if self._reward_noise > 0: return_ro.r += np.random.normal(scale=self._reward_noise) obs = Observation() obs.doubleArray = self._sensors.tolist() return_ro.o = obs return return_ro