def step(self, tau=None): if tau is None: tau = np.zeros(self.nj) if not isinstance(tau, np.ndarray): tau = tau.detach().cpu().numpy() zero_gains = np.zeros(self.nj) p.setJointMotorControlArray(self.robot_id, self.bullet_joint_ids, p.TORQUE_CONTROL, forces=tau, positionGains=zero_gains, velocityGains=zero_gains) p.stepSimulation() q, v = self.get_state() q = self.wrap(q) obs = np.concatenate((q, v)) if self.cost is None: obs_reward = 0 act_reward = 0 else: cost = self.cost(torch.Tensor(obs), torch.Tensor(tau)) obs_reward = -cost.obs.detach().cpu().numpy() act_reward = -cost.act.detach().cpu().numpy() reward = obs_reward + act_reward info = DotMap() info.obs_reward = obs_reward info.act_reward = act_reward info.reward = reward done = False return obs, reward, done, info