示例#1
0
 def start(self):
     self.vrep_start()
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     self.controller([0.0, 0.0])
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data',
                                            vrep.simx_opmode_streaming)[1]
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
示例#2
0
 def step(self, action, return_obs=False):
     self.count += 1
     self.controller(action)
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
         orientation = vrep.simxGetObjectOrientation(
             self.clientID, self.body_handle, -1,
             vrep.simx_opmode_streaming)[1][2]
         goal_pos = vrep.simxGetObjectPosition(
             self.clientID, self.brick_handle, self.body_handle,
             vrep.simx_opmode_streaming)[1]
         joint_pose = [
             vrep.simxGetJointPosition(self.clientID,
                                       self.joint_handles[idx],
                                       vrep.simx_opmode_streaming)[1]
             for idx in range(6)
         ]
     goal_dist = linalg.norm(goal_pos)
     state1 = joint_pose + goal_pos
     if self.goal_dist_prev != None:
         reward = self.reward(self.state0, goal_dist, action)
         self.goal_dist_prev = goal_dist
         self.reward_sum += reward
     sys.stderr.write(
         '\rstep:%d| goal:% 2.1f,% 2.1f | pose:% 2.1f,% 2.1f | avg.reward:% 4.2f'
         % (self.count, self.goal[0], self.goal[1], pose[0], pose[1],
            self.reward_sum / self.count))
     if goal_dist < 0.1:
         done = 1
         print(' | Success')
     else:
         done = 0
     if self.state0 != None:
         self.replay.add({
             'state0': self.state0,
             'action0': action,
             'reward': reward,
             'state1': state1,
             'done': done
         })
     self.state0 = state1
     self.action_prev = action
     self.goal_dist_prev = goal_dist
     if return_obs:
         obs = {'state': state1, 'action': action, 'reward': reward}
         return state1, obs, done
     else:
         return state1, done
 def step(self, action, return_obs=False):
     self.count += 1
     self.controller(action)
     t = vrep.simxGetLastCmdTime(self.clientID)
     vrep.simxSynchronousTrigger(self.clientID)
     while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt:
         pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle,
                                           -1,
                                           vrep.simx_opmode_streaming)[1]
         orientation = vrep.simxGetObjectOrientation(
             self.clientID, self.body_handle, -1,
             vrep.simx_opmode_streaming)[1][2]
         goal_pos = vrep.simxGetObjectPosition(
             self.clientID, self.goal_handle, self.body_handle,
             vrep.simx_opmode_streaming)[1][1:3]
         lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data',
                                            vrep.simx_opmode_streaming)[1]
         lrf = array(vrep.simxUnpackFloats(lrf_bin), dtype=float) / 5.578
     goal_dist = linalg.norm(goal_pos)
     goal_angle = arctan2(-goal_pos[0], goal_pos[1])
     state1 = list(lrf) + [action[0] * 2, action[1]]
     state1 += [goal_dist/5.578, goal_angle/pi] \
                   if goal_dist<5.578 else \
               [1, goal_angle/pi]
     if self.goal_dist_prev != None:
         reward = self.reward(lrf, goal_dist, action)
         self.goal_dist_prev = goal_dist
         self.reward_sum += reward
     sys.stderr.write(
         '\rstep:%d| goal:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f'
         % (self.count, self.goal[0], self.goal[1], pose[0], pose[1],
            self.reward_sum / self.count))
     if min(lrf) < 0.0358:
         done = 1
         print(' | Fail')
     elif goal_dist < 0.1:
         done = 1
         print(' | Success')
     else:
         done = 0
     if self.state0 != None:
         self.replay.add({
             'state0': self.state0,
             'action0': action,
             'reward': reward,
             'state1': state1,
             'done': done
         })
     self.state0 = state1
     self.action_prev = action
     self.goal_dist_prev = goal_dist
     if return_obs:
         obs = {
             'state': state1,
             'lidar': list(lrf),
             'action': action,
             'pose': [pose[0], pose[1], orientation],
             'goal': [goal_dist, goal_angle],
             'reward': reward,
             'time': t + self.dt
         }
         return state1, obs, done
     else:
         return state1, done