示例#1
0
    def step(self, tau=None):
        if tau is None:
            tau = np.zeros(self.nj)
        if not isinstance(tau, np.ndarray):
            tau = tau.detach().cpu().numpy()

        zero_gains = np.zeros(self.nj)
        p.setJointMotorControlArray(self.robot_id,
                                    self.bullet_joint_ids,
                                    p.TORQUE_CONTROL,
                                    forces=tau,
                                    positionGains=zero_gains,
                                    velocityGains=zero_gains)
        p.stepSimulation()
        q, v = self.get_state()
        q = self.wrap(q)
        obs = np.concatenate((q, v))
        if self.cost is None:
            obs_reward = 0
            act_reward = 0
        else:
            cost = self.cost(torch.Tensor(obs), torch.Tensor(tau))
            obs_reward = -cost.obs.detach().cpu().numpy()
            act_reward = -cost.act.detach().cpu().numpy()

        reward = obs_reward + act_reward
        info = DotMap()
        info.obs_reward = obs_reward
        info.act_reward = act_reward
        info.reward = reward
        done = False
        return obs, reward, done, info