def forward(self, joint_angles): # Calculate writhe right_limb_pose, _ = limbPose(self._kdl_tree, self._base_link, self._right_limb_interface, joint_angles) # left_limb_pose, _ = limbPose(self._kdl_tree, self._base_link, self._left_limb_interface, joint_angles, 'left') writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): x1_right = self.target_line[idx_target].copy() x2_right = self.target_line[idx_target + 1].copy() x1_right[1] -= 0.15 x2_right[1] -= 0.15 writhe[idx_target, idx_robot - 5] = GLI(x1_right, x2_right, right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] # x1_left = self.target_line[idx_target].copy() # x2_left = self.target_line[idx_target + 1].copy() # x1_left[1] += 0.15 # x2_left[1] += 0.15 # writhe[idx_target, idx_robot - 5 + 7] = GLI(x1_left, x2_left, # left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target - 1, idx_robot - 5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target + 1], right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] # writhe[idx_target - 1, idx_robot - 5 + 7] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target + 1], # left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] w_right1 = np.abs(writhe[0:10, 0:7].flatten().sum()) w_right2 = np.abs(writhe[10:20, 0:7].flatten().sum()) # w_left1 = np.abs(writhe[0:10, 7:14].flatten().sum()) # w_left2 = np.abs(writhe[10:20, 7:14].flatten().sum()) w = w_right1 + w_right2 return w
def getstate(self): rospy.wait_for_service('/gazebo/get_link_state') torso_pose = self.get_link_state("humanoid::Torso_link", "world").link_state.pose T = tf.transformations.quaternion_matrix([ torso_pose.orientation.x, torso_pose.orientation.y, torso_pose.orientation.z, torso_pose.orientation.w ]) # rotation in /world frame, which is [0, 0, -0.93] to /base frame # target_line_start-target_pos_start is the original position of human in /world frame self.target_line = np.dot(T[:3, :3], (self.target_line_start - self.target_pos_start).T).T + \ [torso_pose.position.x, torso_pose.position.y, torso_pose.position.z] + \ [0, 0, -0.93] right_limb_pose, right_joint_pos = limbPose(self.kdl_tree, self.base_link, self.right_limb_interface, 'right') left_limb_pose, left_joint_pos = limbPose(self.kdl_tree, self.base_link, self.left_limb_interface, 'left') right_joint = [ right_joint_pos[0], right_joint_pos[1], right_joint_pos[2], right_joint_pos[3], right_joint_pos[4], right_joint_pos[5], right_joint_pos[6] ] left_joint = [ left_joint_pos[0], left_joint_pos[1], left_joint_pos[2], left_joint_pos[3], left_joint_pos[4], left_joint_pos[5], left_joint_pos[6] ] # right limb joint positions state1 = np.asarray([right_joint, left_joint]).flatten() # right limb link cartesian positions state2 = np.asarray([right_limb_pose[5:], left_limb_pose[5:]]).flatten() # hugging target -- cylinder # aa = np.asarray([self.cylinder_radius]) # bb = np.asarray([self.cylinder1, self.cylinder1 + asarray([0, 0, self.cylinder_height])]).flatten() # state3 = np.concatenate((aa, bb), axis=0) state3 = self.target_line.flatten() # ####################### writhe matrix ########################### writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): writhe[idx_target, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target-1, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target-1, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] state4 = writhe.flatten() # ############################### interaction mesh ################################## graph_points = np.concatenate( (right_limb_pose[5:], left_limb_pose[5:], self.target_line), 0) InterMesh = np.empty(graph_points.shape) mesh1 = [] mesh2 = [] for idx, point in enumerate(graph_points): neighbor_index = find_neighbors(idx, self.triangulation) W = 0 Lap = point # calculate normalization constant for nei_point in graph_points[neighbor_index]: W = W + 1.0 / math.sqrt((nei_point[0] - point[0])**2 + (nei_point[1] - point[1])**2 + (nei_point[2] - point[2])**2) # calculate Laplace coordinates for nei_point in graph_points[neighbor_index]: mesh1.append(point) mesh2.append(nei_point) dis_nei = math.sqrt((nei_point[0] - point[0])**2 + (nei_point[1] - point[1])**2 + (nei_point[2] - point[2])**2) Lap = Lap - nei_point / (dis_nei * W) InterMesh[idx] = Lap state5 = InterMesh.flatten() # DisMesh = np.empty([limb_pose.shape[0], self.target_line.shape[0]]) # for i in range(DisMesh.shape[0]): # for j in range(DisMesh.shape[1]): # DisMesh[i][j] = math.sqrt( (limb_pose[i][0] - self.target_line[j][0]) ** 2 + # (limb_pose[i][1] - self.target_line[j][1]) ** 2 + # (limb_pose[i][2] - self.target_line[j][2]) ** 2) # state5 = DisMesh.flatten() # # DisMesh = np.empty(limb_pose.shape) # for i in range(DisMesh.shape[0]): # W = 0 # Lap = limb_pose[i] # # calculate normalization constant # for target_point in self.target_line: # W = W + 1.0 / math.sqrt((limb_pose[i][0] - target_point[0])**2 + # (limb_pose[i][1] - target_point[1])**2 + # (limb_pose[i][2] - target_point[2])**2) # for target_point in self.target_line: # dis = math.sqrt((limb_pose[i][0] - target_point[0])**2 + # (limb_pose[i][1] - target_point[1])**2 + # (limb_pose[i][2] - target_point[2])**2) # Lap = Lap - target_point / (dis * W) # DisMesh[i] = Lap # state5 = DisMesh.flatten() state = [state1, state2, state3, state4, state5] return state, writhe, InterMesh, [mesh1, mesh2]
def reward_evaluation(self, w_last, step): rospy.wait_for_service('/gazebo/get_link_state') torso_pose = self.get_link_state("humanoid::Torso_link", "world").link_state.pose T = tf.transformations.quaternion_matrix([ torso_pose.orientation.x, torso_pose.orientation.y, torso_pose.orientation.z, torso_pose.orientation.w ]) # rotation in /world frame, which is [0, 0, -0.93] to /base frame # target_line_start-target_pos_start is the original position of human in /world frame self.target_line = np.dot(T[:3,:3], (self.target_line_start-self.target_pos_start).T).T + \ [torso_pose.position.x, torso_pose.position.y, torso_pose.position.z] + \ [0, 0, -0.93] # Calculate writhe improvement rospy.sleep(0.01) right_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.right_limb_interface, 'right') left_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.left_limb_interface, 'left') writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): x1_right = self.target_line[idx_target].copy() x2_right = self.target_line[idx_target + 1].copy() x1_right[1] -= 0.15 x2_right[1] -= 0.15 writhe[idx_target, idx_robot - 5] = GLI(x1_right, x2_right, right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] x1_left = self.target_line[idx_target].copy() x2_left = self.target_line[idx_target + 1].copy() x1_left[1] += 0.15 x2_left[1] += 0.15 writhe[idx_target, idx_robot - 5 + 7] = GLI( x1_left, x2_left, left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # writhe[idx_target, idx_robot-5] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] # writhe[idx_target, idx_robot-5+7] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target-1, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target-1, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] w_right1 = np.abs(writhe[0:10, 0:7].flatten().sum()) w_right2 = np.abs(writhe[0:10, 7:14].flatten().sum()) w_left1 = np.abs(writhe[10:20, 0:7].flatten().sum()) w_left2 = np.abs(writhe[10:20, 7:14].flatten().sum()) w = w_right1 + w_right2 + w_left1 + w_left2 reward = (w - w_last) * 50 - 7.5 + w * 5 # Prevent robot arms above humanoid arms height_human = self.target_line[4][2] + 0.02 height_robot = (right_limb_pose[5:][:, 2].mean() + left_limb_pose[5:][:, 2].mean()) / 2.0 r2 = height_robot - height_human print r2 if r2 > 0: print( "!!!!!!!!!!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!!!!!!!!!!" ) reward = reward - r2 # Detect collision collision = 0 # Listen to collision information # msg = self.collision_getter.get_msg() # if msg: # if msg.data == "cylinder_collision": # collision = 1 return reward, w, collision
def reward_evaluation(self, w_last, step): rospy.wait_for_service('/gazebo/get_link_state') torso_pose = self.get_link_state("humanoid::Torso_link", "world").link_state.pose T = tf.transformations.quaternion_matrix([torso_pose.orientation.x, torso_pose.orientation.y, torso_pose.orientation.z, torso_pose.orientation.w]) # rotation in /world frame, which is [0, 0, -0.93] to /base frame # target_line_start-target_pos_start is the original position of human in /world frame self.target_line = np.dot(T[:3,:3], (self.target_line_start-self.target_pos_start).T).T + \ [torso_pose.position.x, torso_pose.position.y, torso_pose.position.z] + \ [0, 0, -0.93] if step == 1: print("target_line:", self.target_line) # Calculate writhe improvement rospy.sleep(0.01) right_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.right_limb_interface, 'right') left_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.left_limb_interface, 'left') writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): x1_right = self.target_line[idx_target].copy() x2_right = self.target_line[idx_target + 1].copy() if self.reset_mode == 1 or self.reset_mode == 2 or self.reset_mode == 4: x1_right[1] -= 0.15 x2_right[1] -= 0.15 if self.reset_mode == 5: x1_right[1] += 0.15 x2_right[1] += 0.15 writhe[idx_target, idx_robot - 5] = GLI(x1_right, x2_right, right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] x1_left = self.target_line[idx_target].copy() x2_left = self.target_line[idx_target + 1].copy() if self.reset_mode == 1 or self.reset_mode == 2 or self.reset_mode == 5: x1_left[1] += 0.15 x2_left[1] += 0.15 if self.reset_mode == 4: x1_left[1] -= 0.15 x2_left[1] -= 0.15 writhe[idx_target, idx_robot - 5 + 7] = GLI(x1_left, x2_left, left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # writhe[idx_target, idx_robot-5] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] # writhe[idx_target, idx_robot-5+7] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target-1, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target-1, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] w_right_torso = np.abs(writhe[0:10, 0:7].flatten().sum()) w_left_torso = np.abs(writhe[0:10, 7:14].flatten().sum()) w_right_arm = np.abs(writhe[10:20, 0:7].flatten().sum()) w_left_arm = np.abs(writhe[10:20, 7:14].flatten().sum()) if self.reset_mode == 1 or self.reset_mode == 2: w = w_right_torso + w_right_arm + w_left_torso + w_left_arm if self.reset_mode == 4 or self.reset_mode == 5: w = w_right_torso + w_left_torso # writhe = np.empty((15, 14)) # for idx_target in range(5): # for idx_robot in range(5, 12): # x1_right = self.target_line[idx_target].copy() # x2_right = self.target_line[idx_target + 1].copy() # writhe[idx_target, idx_robot - 5] = GLI(x1_right, x2_right, # right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] # x1_right[1] -= 0.15 # x2_right[1] -= 0.15 # writhe[idx_target + 5, idx_robot - 5] = GLI(x1_right, x2_right, # right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] # x1_right[1] += 0.3 # x2_right[1] += 0.3 # writhe[idx_target + 10, idx_robot - 5] = GLI(x1_right, x2_right, # right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] # x1_left = self.target_line[idx_target+5].copy() # x2_left = self.target_line[idx_target+5 + 1].copy() # writhe[idx_target, idx_robot - 5 + 7] = GLI(x1_left, x2_left, # left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # x1_left[1] -= 0.15 # x2_left[1] -= 0.15 # writhe[idx_target + 5, idx_robot - 5 + 7] = GLI(x1_left, x2_left, # left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # x1_left[1] += 0.3 # x2_left[1] += 0.3 # writhe[idx_target + 10, idx_robot - 5 + 7] = GLI(x1_left, x2_left, # left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # # writhe[idx_target, idx_robot-5] = \ # # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # # right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] # # writhe[idx_target, idx_robot-5+7] = \ # # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # # left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] # w_right1 = np.abs(writhe[0:5, 0:7].flatten().sum()) # w_right2 = np.abs(writhe[5:10, 0:7].flatten().sum()) # w_right3 = np.abs(writhe[10:15, 0:7].flatten().sum()) # w_left1 = np.abs(writhe[0:5, 7:14].flatten().sum()) # w_left2 = np.abs(writhe[5:10, 7:14].flatten().sum()) # w_left3 = np.abs(writhe[10:15, 7:14].flatten().sum()) # w = w_right1 + w_right2 + w_right3 + w_left1 + w_left2 + w_left3 reward = (w - w_last) * 50 #- 5 + w * 5 # Prevent robot arms above humanoid arms height_human = self.target_line[4][2] + 0.02 height_robot = (right_limb_pose[5:][:, 2].mean() + left_limb_pose[5:][:, 2].mean())/2.0 r2 = height_robot - height_human print r2 if r2 > 0: print("!!!!!!!!!!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!!!!!!!!!!\n!!!!!!!!!!!!!!!!!!!!!!!!") reward = reward - r2 # Detect collision collision = 0 # current_pos = self.target_line[4] # target_move = math.hypot((current_pos[0] - self.target_pos_start[0]), # (current_pos[1] - self.target_pos_start[1])) # # print("target_move:" , target_move) # if target_move > 0.4: # # collision = 1 # collision # collision = 1 # # reward = 0.0 # if target_move > 0.2 and step <= 2: # collision = -1 # model load error # Listen to collision information # msg = self.collision_getter.get_msg() # if msg: # if msg.data == "cylinder_collision": # collision = 1 return reward, w, collision
def getstate(self): right_limb_pose, right_joint_pos = limbPose(self.kdl_tree, self.base_link, self.right_limb_interface, 'right') left_limb_pose, left_joint_pos = limbPose(self.kdl_tree, self.base_link, self.left_limb_interface, 'left') right_joint = [ right_joint_pos[0], right_joint_pos[1], right_joint_pos[2], right_joint_pos[3], right_joint_pos[4], right_joint_pos[5], right_joint_pos[6] ] left_joint = [ left_joint_pos[0], left_joint_pos[1], left_joint_pos[2], left_joint_pos[3], left_joint_pos[4], left_joint_pos[5], left_joint_pos[6] ] # right limb joint positions state1 = np.asarray([right_joint, left_joint]).flatten() # right limb link cartesian positions state2 = np.asarray([right_limb_pose[5:], left_limb_pose[5:]]).flatten() # hugging target -- cylinder # aa = np.asarray([self.cylinder_radius]) # bb = np.asarray([self.cylinder1, self.cylinder1 + asarray([0, 0, self.cylinder_height])]).flatten() # state3 = np.concatenate((aa, bb), axis=0) state3 = self.target_line.flatten() # ####################### writhe matrix ########################### writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): writhe[idx_target, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target-1, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target-1, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] state4 = writhe.flatten() # ############################### interaction mesh ################################## graph_points = np.concatenate( (right_limb_pose[5:], left_limb_pose[5:], self.target_line), 0) InterMesh = np.empty(graph_points.shape) for idx, point in enumerate(graph_points): neighbor_index = find_neighbors(idx, self.triangulation) W = 0 Lap = point # calculate normalization constant for nei_point in graph_points[neighbor_index]: W = W + 1.0 / math.sqrt((nei_point[0] - point[0])**2 + (nei_point[1] - point[1])**2 + (nei_point[2] - point[2])**2) # calculate Laplace coordinates for nei_point in graph_points[neighbor_index]: dis_nei = math.sqrt((nei_point[0] - point[0])**2 + (nei_point[1] - point[1])**2 + (nei_point[2] - point[2])**2) Lap = Lap - nei_point / (dis_nei * W) InterMesh[idx] = Lap state5 = InterMesh.flatten() state = [state1, state2, state3, state4, state5] return state, writhe, InterMesh
def reward_evaluation(self, w_last, step): # Calculate writhe improvement rospy.sleep(0.01) right_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.right_limb_interface, 'right') left_limb_pose, _ = limbPose(self.kdl_tree, self.base_link, self.left_limb_interface, 'left') writhe = np.empty((len(self.target_line) - 2, 14)) for idx_target in range(10): for idx_robot in range(5, 12): x1_right = self.target_line[idx_target].copy() x2_right = self.target_line[idx_target + 1].copy() x1_right[1] -= 0.15 x2_right[1] -= 0.15 writhe[idx_target, idx_robot - 5] = GLI(x1_right, x2_right, right_limb_pose[idx_robot], right_limb_pose[idx_robot + 1])[0] x1_left = self.target_line[idx_target].copy() x2_left = self.target_line[idx_target + 1].copy() x1_left[1] += 0.15 x2_left[1] += 0.15 writhe[idx_target, idx_robot - 5 + 7] = GLI( x1_left, x2_left, left_limb_pose[idx_robot], left_limb_pose[idx_robot + 1])[0] # writhe[idx_target, idx_robot-5] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] # writhe[idx_target, idx_robot-5+7] = \ # GLI(self.target_line[idx_target], self.target_line[idx_target+1], # left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] for idx_target in range(11, 21): for idx_robot in range(5, 12): writhe[idx_target-1, idx_robot-5] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], right_limb_pose[idx_robot], right_limb_pose[idx_robot+1])[0] writhe[idx_target-1, idx_robot-5+7] = \ GLI(self.target_line[idx_target], self.target_line[idx_target+1], left_limb_pose[idx_robot], left_limb_pose[idx_robot+1])[0] w_right1 = np.abs(writhe[0:10, 0:7].flatten().sum()) w_right2 = np.abs(writhe[0:10, 7:14].flatten().sum()) w_left1 = np.abs(writhe[10:20, 0:7].flatten().sum()) w_left2 = np.abs(writhe[10:20, 7:14].flatten().sum()) w = w_right1 + w_right2 + w_left1 + w_left2 reward = (w - w_last) * 50 - 7.5 + w * 5 # Detect collision collision = 0 current_pos = self.target_line[4] target_move = math.hypot((current_pos[0] - self.target_pos_start[0]), (current_pos[1] - self.target_pos_start[1])) # print("state_pose:", current_pos, "#########") print("target_move:", target_move) # if target_move > 0.4: # # collision = 1 # collision # collision = 1 # # reward = 0.0 # if target_move > 0.2 and step <= 2: # collision = -1 # model load error # Listen to collision information # msg = self.collision_getter.get_msg() # print("Collision massage:", msg) # if msg: # if msg.data == "cylinder_collision": # collision = 1 return reward, w, collision