def _tick(self, ret_val): self._ctr -= 1 if self._ctr == 0: self._stage += 1 if self._hp.action_bound: ret_val['actions'] = truncate_movement(ret_val['actions'][None], self._hp)[0] return ret_val
def _process(self, actions): if self._hp.discrete_gripper: actions = discretize_gripper(actions, self._hp.discrete_gripper) if self._hp.action_bound: actions = truncate_movement(actions, self._hp) actions = np.repeat(actions, self._hp.repeat, axis=0) return actions
def _default_sampler(self, mean, sigma, M): actions = np.random.multivariate_normal(mean, sigma, M) actions = actions.reshape(M, self._hp.nactions, self._adim) if self._hp.action_bound: actions = truncate_movement(actions, self._hp) actions = np.repeat(actions, self._hp.repeat, axis=1) return actions
def sample_actions(self, mean, sigma, hp, M): actions = np.random.multivariate_normal(mean, sigma, M) actions = actions.reshape(M, self.naction_steps, self.adim) if hp.discrete_ind != None: actions = discretize(actions, M, self.naction_steps, hp.discrete_ind) if hp.action_bound: actions = truncate_movement(actions, hp) actions = np.repeat(actions, hp.repeat, axis=1) if hp.add_zero_action: actions[0] = 0 return actions