示例#1
0
 def step(self, action):
     if self.if_render:
         self.render()
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachDist, pushDist = self.compute_reward(action, obs_dict)
     self.curr_path_length += 1
     #info = self._get_info()
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     return ob, reward, done, {
         'reachDist': reachDist,
         'goalDist': pushDist,
         'epRew': reward,
         'pickRew': None,
         'success': float(pushDist <= 0.07)
     }
 def step(self, action):
     if self.if_render:
         self.render()
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode == 'euler':
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     elif self.rotMode == 'fixed':
         self.set_xyz_action(action[:3])
     elif self.rotMode == 'rotz':
         self.set_xyz_action_rotz(action[:4])
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward, reachRew, reachDist, pickRew, hammerRew, hammerDist, screwDist = self.compute_reward(
         action, obs_dict, mode=self.rewMode)
     self.curr_path_length += 1
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     # return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew,
     #                             'hammerRew': hammerRew, 'epRew' : reward, 'hammerDist': hammerDist, 'screwDist': screwDist}
     return ob, reward, done, {
         'reachDist': reachDist,
         'pickRew': pickRew,
         'epRew': reward,
         'goalDist': screwDist,
         'success': float(screwDist <= 0.05)
     }
示例#3
0
 def step(self, action):
     # self.set_xyz_action_rot(action[:7])
     if self.rotMode != 'quat':
         assert self.rotMode == 'euler'
         action_ = np.zeros(7)
         action_[:3] = action[:3]
         action_[3:] = euler2quat(action[3:6])
         self.set_xyz_action_rot(action_)
     else:
         self.set_xyz_action_rot(action[:7])
     self.do_simulation([action[-1], -action[-1]])
     # The marker seems to get reset every time you do a simulation
     self._set_goal_marker(self._state_goal)
     ob = self._get_obs()
     obs_dict = self._get_obs_dict()
     reward , reachRew, reachDist, pickRew, placeRew , placingDist = self.compute_reward(action, obs_dict, mode = self.rewMode)
     self.curr_path_length +=1
     #info = self._get_info()
     if self.curr_path_length == self.max_path_length:
         done = True
     else:
         done = False
     return ob, reward, done, { 'reachRew':reachRew, 'reachDist': reachDist, 'pickRew':pickRew, 'placeRew': placeRew, 'epRew' : reward, 'placingDist': placingDist}