def get_shortest_path(self, floor, source_world, target_world, entire_path=False): """ Get the shortest path from one point to another point. If any of the given point is not in the graph, add it to the graph and create an edge between it to its closest node. :param floor: floor number :param source_world: 2D source location in world reference frame (metric) :param target_world: 2D target location in world reference frame (metric) :param entire_path: whether to return the entire path """ assert self.build_graph, 'cannot get shortest path without building the graph' source_map = tuple(self.world_to_map(source_world)) target_map = tuple(self.world_to_map(target_world)) g = self.floor_graph[floor] if not g.has_node(target_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin( np.linalg.norm(nodes - target_map, axis=1))]) g.add_edge(closest_node, target_map, weight=l2_distance(closest_node, target_map)) if not g.has_node(source_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin( np.linalg.norm(nodes - source_map, axis=1))]) g.add_edge(closest_node, source_map, weight=l2_distance(closest_node, source_map)) path_map = np.array( nx.astar_path(g, source_map, target_map, heuristic=l2_distance)) path_world = self.map_to_world(path_map) geodesic_distance = np.sum( np.linalg.norm(path_world[1:] - path_world[:-1], axis=1)) path_world = path_world[::self.waypoint_interval] if not entire_path: path_world = path_world[:self.num_waypoints] num_remaining_waypoints = self.num_waypoints - path_world.shape[0] if num_remaining_waypoints > 0: remaining_waypoints = np.tile(target_world, (num_remaining_waypoints, 1)) path_world = np.concatenate((path_world, remaining_waypoints), axis=0) return path_world, geodesic_distance
def sample_initial_pose_and_target_pos(self, env): """ Sample robot initial pose and target position :param env: environment instance :return: initial pose and target position """ _, initial_pos = env.scene.get_random_point(floor=self.floor_num) max_trials = 100 dist = 0.0 for _ in range(max_trials): _, target_pos = env.scene.get_random_point(floor=self.floor_num) if env.scene.build_graph: _, dist = env.scene.get_shortest_path(self.floor_num, initial_pos[:2], target_pos[:2], entire_path=False) else: dist = l2_distance(initial_pos, target_pos) if self.target_dist_min < dist < self.target_dist_max: break if not (self.target_dist_min < dist < self.target_dist_max): print("WARNING: Failed to sample initial and target positions") initial_orn = np.array([0, 0, np.random.uniform(0, np.pi * 2)]) return initial_pos, initial_orn, target_pos
def reset_initial_and_target_pos(self): """ Reset initial_pos, initial_orn and target_pos through randomization The geodesic distance (or L2 distance if traversable map graph is not built) between initial_pos and target_pos has to be between [self.target_dist_min, self.target_dist_max] """ _, self.initial_pos = self.get_random_hotspot_floor(self.floor_num) max_trials = 100 dist = 0.0 for _ in range(max_trials): _, self.target_pos = self.get_random_hotspot_floor(self.floor_num) if self.scene.build_graph: path, dist = self.get_shortest_path(from_initial_pos=True) else: dist = l2_distance(self.initial_pos, self.target_pos) if self.target_dist_min < dist < self.target_dist_max: break if not (self.target_dist_min < dist < self.target_dist_max): print("WARNING: Failed to sample initial and target positions") # forward_vec = np.array([0.0, 1.0, 0.0]) # predict_vec = np.array([path[5][0] - path[0][0], path[5][1] - path[0][1], 0.0]) # rad = math.acos(np.dot(forward_vec, predict_vec / np.linalg.norm(predict_vec))) # quat = Quaternion(axis=(0.0, 0.0, 1.0), radians=rad) # self.initial_orn = np.array([0, 0, quat.radians]) self.initial_orn = np.array([0, 0, np.random.uniform(0, np.pi * 2)])
def get_reward(self, collision_links): reward = self.slack_reward # |slack_reward| = 0.01 per step new_normalized_potential = self.get_potential( ) / self.initial_potential potential_reward = self.normalized_potential - new_normalized_potential reward += potential_reward * self.potential_reward_weight # |potential_reward| ~= 0.1 per step self.normalized_potential = new_normalized_potential # electricity_reward = np.abs(self.robots[0].joint_speeds * self.robots[0].joint_torque).mean().item() electricity_reward = 0.0 reward += electricity_reward * self.electricity_reward_weight # |electricity_reward| ~= 0.05 per step # stall_torque_reward = np.square(self.robots[0].joint_torque).mean() stall_torque_reward = 0.0 reward += stall_torque_reward * self.stall_torque_reward_weight # |stall_torque_reward| ~= 0.05 per step collision_reward = -1.0 if -1 in collision_links else 0.0 reward += collision_reward * self.collision_reward_weight # |collision_reward| ~= 1.0 per step if collision # goal reached if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol: reward += self.success_reward # |success_reward| = 10.0 per step return reward
def get_shortest_path(self, floor, source_world, target_world, entire_path=False): assert self.build_graph, 'cannot get shortest path without building the graph' source_map = tuple(self.world_to_map(source_world)) target_map = tuple(self.world_to_map(target_world)) g = self.floor_graph[floor] if not g.has_node(target_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin( np.linalg.norm(nodes - target_map, axis=1))]) g.add_edge(closest_node, target_map, weight=l2_distance(closest_node, target_map)) if not g.has_node(source_map): nodes = np.array(g.nodes) closest_node = tuple(nodes[np.argmin( np.linalg.norm(nodes - source_map, axis=1))]) g.add_edge(closest_node, source_map, weight=l2_distance(closest_node, source_map)) path_map = np.array( nx.astar_path(g, source_map, target_map, heuristic=l2_distance)) path_world = self.map_to_world(path_map) geodesic_distance = np.sum( np.linalg.norm(path_world[1:] - path_world[:-1], axis=1)) path_world = path_world[::self.waypoint_interval] if not entire_path: path_world = path_world[:self.num_waypoints] num_remaining_waypoints = self.num_waypoints - path_world.shape[0] if num_remaining_waypoints > 0: remaining_waypoints = np.tile(target_world, (num_remaining_waypoints, 1)) path_world = np.concatenate((path_world, remaining_waypoints), axis=0) return path_world, geodesic_distance
def get_l2_potential(self, env): """ Get potential based on L2 distance :param env: environment instance :return: L2 distance to the target position """ return l2_distance(env.robots[0].get_position()[:2], self.target_pos[:2])
def get_shortest_path(self, floor, source_world, target_world, entire_path=False): """ Get a trivial shortest path because the scene is empty """ logging.warning( 'WARNING: trying to compute the shortest path in EmptyScene (assuming empty space)') shortest_path = np.stack((source_world, target_world)) geodesic_distance = l2_distance(source_world, target_world) return shortest_path, geodesic_distance
def get_l2_potential(self, env): """ L2 distance to the goal :param env: environment instance :return: potential based on L2 distance to goal """ return l2_distance(env.robots[0].get_end_effector_position(), self.target_pos)
def after_simulation(self, cache, collision_links): """ Accumulate evaluation stats :param cache: cache returned from before_simulation :param collision_links: collisions from last time step """ old_robot_position = cache['robot_position'][:2] new_robot_position = self.robots[0].get_position()[:2] self.path_length += l2_distance(old_robot_position, new_robot_position)
def step(self, env): """ Perform task-specific step: step visualization and aggregate path length :param env: environment instance """ self.step_visualization(env) new_robot_pos = env.robots[0].get_position()[:2] self.path_length += l2_distance(self.robot_pos, new_robot_pos) self.robot_pos = new_robot_pos
def get_shortest_path(self, floor, source_world, target_world, entire_path=False): logging.warning( 'WARNING: trying to compute the shortest path in StadiumScene (assuming empty space)' ) shortest_path = np.stack((source_world, target_world)) geodesic_distance = l2_distance(source_world, target_world) return shortest_path, geodesic_distance
def get_termination(self, task, env): """ Return whether the episode should terminate. Terminate if point goal is reached (distance below threshold) :param task: task instance :param env: environment instance :return: done, info """ done = l2_distance(env.robots[0].get_position()[:2], task.target_pos[:2]) < self.dist_tol success = done return done, success
def get_reward(self, task, env): """ Check if the distance between the robot's end-effector and the goal is below the distance threshold :param task: task instance :param env: environment instance :return: reward """ success = l2_distance(env.robots[0].get_end_effector_position(), task.target_pos) < self.dist_tol reward = self.success_reward if success else 0.0 return reward
def build_trav_graph(self, maps_path, floor, trav_map): """ Build traversibility graph and only take the largest connected component :param maps_path: String with the path to the folder containing the traversability maps :param floor: floor number :param trav_map: traversability map """ graph_file = os.path.join(maps_path, 'floor_trav_{}.p'.format(floor)) if os.path.isfile(graph_file): logging.info("Loading traversable graph") with open(graph_file, 'rb') as pfile: g = pickle.load(pfile) else: logging.info("Building traversable graph") g = nx.Graph() for i in range(self.trav_map_size): for j in range(self.trav_map_size): if trav_map[i, j] == 0: continue g.add_node((i, j)) # 8-connected graph neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)] for n in neighbors: if 0 <= n[0] < self.trav_map_size and \ 0 <= n[1] < self.trav_map_size and \ trav_map[n[0], n[1]] > 0: g.add_edge(n, (i, j), weight=l2_distance(n, (i, j))) # only take the largest connected component largest_cc = max(nx.connected_components(g), key=len) g = g.subgraph(largest_cc).copy() with open(graph_file, 'wb') as pfile: pickle.dump(g, pfile, protocol=pickle.HIGHEST_PROTOCOL) self.floor_graph.append(g) # update trav_map accordingly trav_map[:, :] = 0 for node in g.nodes: trav_map[node[0], node[1]] = 255
def reset_initial_and_target_pos(self): """ Reset initial_pos, initial_orn and target_pos through randomization The geodesic distance (or L2 distance if traversable map graph is not built) between initial_pos and target_pos has to be between [self.target_dist_min, self.target_dist_max] """ _, self.initial_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) max_trials = 100 dist = 0.0 for _ in range(max_trials): _, self.target_pos = self.scene.get_random_point_floor(self.floor_num, self.random_height) if self.scene.build_graph: _, dist = self.get_shortest_path(from_initial_pos=True) else: dist = l2_distance(self.initial_pos, self.target_pos) if self.target_dist_min < dist < self.target_dist_max: break if not (self.target_dist_min < dist < self.target_dist_max): print("WARNING: Failed to sample initial and target positions") self.initial_orn = np.array([0, 0, np.random.uniform(0, np.pi * 2)])
def reset_initial_and_target_pos(self): collision_links = [-1] while -1 in collision_links: # if collision happens, reinitialize floor, pos = self.scene.get_random_point() self.robots[0].set_position(pos=[pos[0], pos[1], pos[2] + 0.1]) self.robots[0].set_orientation(orn=quatToXYZW( euler2quat(0, 0, np.random.uniform(0, np.pi * 2)), 'wxyz')) collision_links = [] for _ in range(self.simulator_loop): self.simulator_step() collision_links += [ item[3] for item in p.getContactPoints( bodyA=self.robots[0].robot_ids[0]) ] collision_links = np.unique(collision_links) self.initial_pos = pos dist = 0.0 while dist < 1.0: # if initial and target positions are < 1 meter away from each other, reinitialize _, self.target_pos = self.scene.get_random_point_floor( floor, self.random_height) dist = l2_distance(self.initial_pos, self.target_pos)
def get_termination(self): self.current_step += 1 done, info = False, {} # goal reached if l2_distance(self.target_pos, self.get_position_of_interest()) < self.dist_tol: # print('goal') done = True info['success'] = True # robot flips over elif self.robots[0].get_position()[2] > 0.1: # print('death') done = True info['success'] = False # time out elif self.current_step >= self.max_step: # print('timeout') done = True info['success'] = False return done, info
def get_potential(self): return l2_distance(self.target_pos, self.get_position_of_interest())
def get_pixel_potential(self): """ :return: L2 distance between middle of image and current """ return l2_distance([self.image_width/2, self.image_height/2], self.get_goal_center())
def is_goal_reached(self): return l2_distance(self.get_position_of_interest(), self.target_pos) < self.dist_tol
def get_l2_potential(self): """ :return: L2 distance to the target position """ return l2_distance(self.target_pos, self.get_position_of_interest())
def is_goal_reached(self): if self.reward_type == 'pixel': return l2_distance([self.image_width/2, self.image_height/2], self.get_position_of_interest()) < self.dist_tol else: return l2_distance(self.get_position_of_interest(), self.target_pos) < self.dist_tol
def load_trav_map(self): self.floor_map = [] self.floor_graph = [] for f in range(len(self.floors)): trav_map = np.array( Image.open( os.path.join(get_model_path(self.model_id), 'floor_trav_{}.png'.format(f)))) obstacle_map = np.array( Image.open( os.path.join(get_model_path(self.model_id), 'floor_{}.png'.format(f)))) if self.trav_map_original_size is None: height, width = trav_map.shape assert height == width, 'trav map is not a square' self.trav_map_original_size = height self.trav_map_size = int(self.trav_map_original_size * self.trav_map_default_resolution / self.trav_map_resolution) trav_map[obstacle_map == 0] = 0 trav_map = cv2.resize(trav_map, (self.trav_map_size, self.trav_map_size)) trav_map = cv2.erode( trav_map, np.ones((self.trav_map_erosion, self.trav_map_erosion))) trav_map[trav_map < 255] = 0 if self.build_graph: graph_file = os.path.join(get_model_path(self.model_id), 'floor_trav_{}.p'.format(f)) if os.path.isfile(graph_file): logging.info("Loading traversable graph") with open(graph_file, 'rb') as pfile: g = pickle.load(pfile) else: logging.info("Building traversable graph") g = nx.Graph() for i in range(self.trav_map_size): for j in range(self.trav_map_size): if trav_map[i, j] > 0: g.add_node((i, j)) # 8-connected graph neighbors = [(i - 1, j - 1), (i, j - 1), (i + 1, j - 1), (i - 1, j)] for n in neighbors: if 0 <= n[0] < self.trav_map_size and 0 <= n[ 1] < self.trav_map_size and trav_map[ n[0], n[1]] > 0: g.add_edge(n, (i, j), weight=l2_distance( n, (i, j))) # only take the largest connected component largest_cc = max(nx.connected_components(g), key=len) g = g.subgraph(largest_cc).copy() with open(graph_file, 'wb') as pfile: pickle.dump(g, pfile, protocol=pickle.HIGHEST_PROTOCOL) self.floor_graph.append(g) # update trav_map accordingly trav_map[:, :] = 0 for node in g.nodes: trav_map[node[0], node[1]] = 255 self.floor_map.append(trav_map)
def get_arm_joint_positions(self, arm_ik_goal): """ Attempt to find arm_joint_positions that satisfies arm_subgoal If failed, return None :param arm_ik_goal: [x, y, z] in the world frame :return: arm joint positions """ ik_start = time() max_limits, min_limits, rest_position, joint_range, joint_damping = \ self.get_ik_parameters() n_attempt = 0 max_attempt = 75 sample_fn = get_sample_fn(self.robot_id, self.arm_joint_ids) base_pose = get_base_values(self.robot_id) state_id = p.saveState() #p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, False) # find collision-free IK solution for arm_subgoal while n_attempt < max_attempt: if self.robot_type == 'Movo': self.robot.tuck() set_joint_positions(self.robot_id, self.arm_joint_ids, sample_fn()) arm_joint_positions = p.calculateInverseKinematics( self.robot_id, self.robot.end_effector_part_index(), targetPosition=arm_ik_goal, # targetOrientation=self.robots[0].get_orientation(), lowerLimits=min_limits, upperLimits=max_limits, jointRanges=joint_range, restPoses=rest_position, jointDamping=joint_damping, solver=p.IK_DLS, maxNumIterations=100) if self.robot_type == 'Fetch': arm_joint_positions = arm_joint_positions[2:10] elif self.robot_type == 'Movo': arm_joint_positions = arm_joint_positions[:8] set_joint_positions(self.robot_id, self.arm_joint_ids, arm_joint_positions) dist = l2_distance(self.robot.get_end_effector_position(), arm_ik_goal) # print('dist', dist) if dist > self.arm_ik_threshold: n_attempt += 1 continue # need to simulator_step to get the latest collision self.simulator_step() # simulator_step will slightly move the robot base and the objects set_base_values_with_z(self.robot_id, base_pose, z=self.initial_height) # self.reset_object_states() # TODO: have a princpled way for stashing and resetting object states # arm should not have any collision if self.robot_type == 'Movo': collision_free = is_collision_free( body_a=self.robot_id, link_a_list=self.arm_joint_ids_all) # ignore linear link else: collision_free = is_collision_free( body_a=self.robot_id, link_a_list=self.arm_joint_ids) if not collision_free: n_attempt += 1 # print('arm has collision') continue # gripper should not have any self-collision collision_free = is_collision_free( body_a=self.robot_id, link_a_list=[self.robot.end_effector_part_index()], body_b=self.robot_id) if not collision_free: n_attempt += 1 print('gripper has collision') continue #self.episode_metrics['arm_ik_time'] += time() - ik_start #p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True) p.restoreState(state_id) p.removeState(state_id) return arm_joint_positions #p.configureDebugVisualizer(p.COV_ENABLE_RENDERING, True) p.restoreState(state_id) p.removeState(state_id) #self.episode_metrics['arm_ik_time'] += time() - ik_start return None