def _tick(self, ret_val):
     self._ctr -= 1
     if self._ctr == 0:
         self._stage += 1
     if self._hp.action_bound:
         ret_val['actions'] = truncate_movement(ret_val['actions'][None], self._hp)[0]
     return ret_val
示例#2
0
    def _process(self, actions):
        if self._hp.discrete_gripper:
            actions = discretize_gripper(actions, self._hp.discrete_gripper)
        if self._hp.action_bound:
            actions = truncate_movement(actions, self._hp)

        actions = np.repeat(actions, self._hp.repeat, axis=0)
        return actions
    def _default_sampler(self, mean, sigma, M):
        actions = np.random.multivariate_normal(mean, sigma, M)
        actions = actions.reshape(M, self._hp.nactions, self._adim)
        if self._hp.action_bound:
            actions = truncate_movement(actions, self._hp)

        actions = np.repeat(actions, self._hp.repeat, axis=1)
        return actions
    def sample_actions(self, mean, sigma, hp, M):
        actions = np.random.multivariate_normal(mean, sigma, M)
        actions = actions.reshape(M, self.naction_steps, self.adim)
        if hp.discrete_ind != None:
            actions = discretize(actions, M, self.naction_steps,
                                 hp.discrete_ind)

        if hp.action_bound:
            actions = truncate_movement(actions, hp)
        actions = np.repeat(actions, hp.repeat, axis=1)

        if hp.add_zero_action:
            actions[0] = 0

        return actions