def start(self): self.vrep_start() t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) self.controller([0.0, 0.0]) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data', vrep.simx_opmode_streaming)[1] pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1]
def step(self, action, return_obs=False): self.count += 1 self.controller(action) t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1] orientation = vrep.simxGetObjectOrientation( self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1][2] goal_pos = vrep.simxGetObjectPosition( self.clientID, self.brick_handle, self.body_handle, vrep.simx_opmode_streaming)[1] joint_pose = [ vrep.simxGetJointPosition(self.clientID, self.joint_handles[idx], vrep.simx_opmode_streaming)[1] for idx in range(6) ] goal_dist = linalg.norm(goal_pos) state1 = joint_pose + goal_pos if self.goal_dist_prev != None: reward = self.reward(self.state0, goal_dist, action) self.goal_dist_prev = goal_dist self.reward_sum += reward sys.stderr.write( '\rstep:%d| goal:% 2.1f,% 2.1f | pose:% 2.1f,% 2.1f | avg.reward:% 4.2f' % (self.count, self.goal[0], self.goal[1], pose[0], pose[1], self.reward_sum / self.count)) if goal_dist < 0.1: done = 1 print(' | Success') else: done = 0 if self.state0 != None: self.replay.add({ 'state0': self.state0, 'action0': action, 'reward': reward, 'state1': state1, 'done': done }) self.state0 = state1 self.action_prev = action self.goal_dist_prev = goal_dist if return_obs: obs = {'state': state1, 'action': action, 'reward': reward} return state1, obs, done else: return state1, done
def step(self, action, return_obs=False): self.count += 1 self.controller(action) t = vrep.simxGetLastCmdTime(self.clientID) vrep.simxSynchronousTrigger(self.clientID) while vrep.simxGetLastCmdTime(self.clientID) - t < self.dt: pose = vrep.simxGetObjectPosition(self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1] orientation = vrep.simxGetObjectOrientation( self.clientID, self.body_handle, -1, vrep.simx_opmode_streaming)[1][2] goal_pos = vrep.simxGetObjectPosition( self.clientID, self.goal_handle, self.body_handle, vrep.simx_opmode_streaming)[1][1:3] lrf_bin = vrep.simxGetStringSignal(self.clientID, 'hokuyo_data', vrep.simx_opmode_streaming)[1] lrf = array(vrep.simxUnpackFloats(lrf_bin), dtype=float) / 5.578 goal_dist = linalg.norm(goal_pos) goal_angle = arctan2(-goal_pos[0], goal_pos[1]) state1 = list(lrf) + [action[0] * 2, action[1]] state1 += [goal_dist/5.578, goal_angle/pi] \ if goal_dist<5.578 else \ [1, goal_angle/pi] if self.goal_dist_prev != None: reward = self.reward(lrf, goal_dist, action) self.goal_dist_prev = goal_dist self.reward_sum += reward sys.stderr.write( '\rstep:%d| goal:% 2.1f, % 2.1f | pose:% 2.1f, % 2.1f | avg.reward:% 4.2f' % (self.count, self.goal[0], self.goal[1], pose[0], pose[1], self.reward_sum / self.count)) if min(lrf) < 0.0358: done = 1 print(' | Fail') elif goal_dist < 0.1: done = 1 print(' | Success') else: done = 0 if self.state0 != None: self.replay.add({ 'state0': self.state0, 'action0': action, 'reward': reward, 'state1': state1, 'done': done }) self.state0 = state1 self.action_prev = action self.goal_dist_prev = goal_dist if return_obs: obs = { 'state': state1, 'lidar': list(lrf), 'action': action, 'pose': [pose[0], pose[1], orientation], 'goal': [goal_dist, goal_angle], 'reward': reward, 'time': t + self.dt } return state1, obs, done else: return state1, done