def worker(idx, house_id, device): colormapFile = "../metadata/colormap_coarse.csv" api = objrender.RenderAPI(w=args.width, h=args.height, device=device) env = Environment(api, house_id, cfg) N = 15000 start = time.time() cnt = 0 env.reset() for t in range(N): cnt += 1 env.move_forward(random.random() * 3, random.random() * 3) mat = env.render() if (cnt % 50 == 0): env.reset() end = time.time() print("Worker {}, speed {:.3f} fps".format(idx, N / (end - start)))
if env.house.obsMap[i][j] == 0: con.append([i, j]) for i in range(100): gx, gy = random.choice(con) x, y = env.house.to_coor(gx, gy, True) loc.append([x, y]) print([gx, gy, x, y]) #print(loc) #print (env.cam.pos) for i in range(len(loc)): env.cam.pos.x = loc[i][0] env.cam.pos.y = env.house.robotHei env.cam.pos.z = loc[i][1] #mat = cv2.cvtColor(env.render_cube_map(), cv2.COLOR_BGR2RGB) mat = env.debug_render() rgb = env.render(RenderMode.RGB) semantic = env.render(RenderMode.SEMANTIC) print(gendp()) #depth = np.array(int(gendp()*65535/3)).astype(np.uint16) depth = (gendp() * 65535 / 3).astype(np.uint16) #将浮点型数据转化为uint16 print(depth) #cv2.imwrite('depth.png',depth) cv2.imshow("aaa", mat) cv2.waitKey(0) cv2.imshow("bbb", depth) cv2.waitKey(0)
class House3DRGBD: def __init__(self, train_mode=True, area_reward_scale=1, collision_penalty=0.1, step_penalty=0.0005, max_depth=3.0, render_door=False, start_indoor=False, ignore_collision=False, ob_dilation_kernel=5, large_map_size=80): self.seed() self.configs = get_configs() self.configs['large_map_size'] = large_map_size self.env = None self.train_mode = train_mode self.render_door = render_door self.ignore_collision = ignore_collision self.start_indoor = start_indoor self.render_height = self.configs['render_height'] self.render_width = self.configs['render_width'] self.img_height = self.configs['output_height'] self.img_width = self.configs['output_width'] self.ob_dilation_kernel = ob_dilation_kernel self.config = load_config(self.configs['path'], prefix=self.configs['par_path']) self.move_sensitivity = self.configs['move_sensitivity'] self.rot_sensitivity = self.configs['rot_sensitivity'] self.train_houses = self.configs['train_houses'] self.test_houses = self.configs['test_houses'] if train_mode: self.houses_id = self.train_houses # print("Number of traning houses:", len(self.houses_id)) else: self.houses_id = None # self.test_houses self.depth_threshold = (0, max_depth) self.area_reward_scale = area_reward_scale self.collision_penalty = collision_penalty self.step_penalty = step_penalty self.observation_space = [self.img_width, self.img_height, 3] self.action_space = [6] def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self, house_id=None, x=None, y=None, yaw=None): if house_id is None: house_id = self.np_random.choice(self.houses_id, 1)[0] self.hid = house_id if self.env is not None: del self.api del self.env self.api = objrender.RenderAPI(self.render_width, self.render_height, device=RENDERING_GPU) self.env = Environment(self.api, house_id, self.config, GridDet=self.configs['GridDet'], RenderDoor=self.render_door, StartIndoor=self.start_indoor) if not self.train_mode: self.loc_map = self.env.gen_locmap() obs_map = self.env.house.obsMap.T self.obs_pos = obs_map == 1 self.traj = [] self.traj_actions = [] self.grid_traj = [] self.L_min = self.env.house.L_lo self.L_max = self.env.house.L_hi self.grid_size = self.env.house.grid_det grid_num = np.array( [self.env.house.n_row[0] + 1, self.env.house.n_row[1] + 1]) self.grids_mat = np.zeros(tuple(grid_num), dtype=np.uint8) self.max_grid_size = np.max(grid_num) self.max_seen_area = float(np.prod(grid_num)) self.env.reset(x=x, y=y, yaw=yaw) self.start_pos, self.grid_start_pos = self.get_camera_grid_pos() if not self.train_mode: print('start pose: ', self.start_pos) self.traj.append(self.start_pos.tolist()) self.grid_traj.append(self.grid_start_pos.tolist()) rgb, depth, extrinsics = self.get_obs() large_loc_map, small_loc_map = self.get_loc_map() self.seen_area = self.get_seen_area(rgb, depth, extrinsics, self.grids_mat) self.ep_len = 0 self.ep_reward = 0 self.collision_times = 0 ob = (self.resize_img(rgb), large_loc_map, small_loc_map) return ob def step(self, action): if self.ignore_collision: collision_flag = self.motion_primitive_no_check(action) else: collision_flag = self.motion_primitive(action) rgb, depth, extrinsics = self.get_obs() large_loc_map, small_loc_map = self.get_loc_map() if not self.train_mode: current_pos, grid_current_pos = self.get_camera_grid_pos() self.traj_actions.append(int(action)) self.traj.append(current_pos.tolist()) self.grid_traj.append(grid_current_pos.tolist()) reward, seen_area, raw_reward = self.cal_reward( rgb, depth, extrinsics, collision_flag) self.ep_len += 1 self.ep_reward += reward if collision_flag: self.collision_times += 1 info = { 'reward_so_far': self.ep_reward, 'steps_so_far': self.ep_len, 'seen_area': seen_area, 'collisions': self.collision_times, 'start_pose': self.start_pos, 'house_id': self.hid, 'collision_flag': collision_flag } info = {**info, **raw_reward} done = False ob = (self.resize_img(rgb), large_loc_map, small_loc_map) return ob, reward, done, info def render(self): loc_map, small_map = self.get_loc_map() if not self.train_mode: rad = self.env.house.robotRad / self.env.house.grid_det x = int(loc_map.shape[0] / 2) y = int(loc_map.shape[1] / 2) cv2.circle(loc_map, (x, y), 1, (255, 0, 255), thickness=-1) x = int(small_map.shape[0] / 2) y = int(small_map.shape[1] / 2) cv2.circle(small_map, (x, y), int(rad) * 2, (255, 0, 255), thickness=-1) loc_map = cv2.resize(loc_map, (self.render_width, self.render_height), interpolation=cv2.INTER_CUBIC) if not self.train_mode: x = int(loc_map.shape[0] / 2) y = int(loc_map.shape[1] / 2) cv2.circle(loc_map, (x, y), 2, (255, 0, 255), thickness=-1) self.env.set_render_mode('rgb') rgb = self.env.render() img = np.concatenate((rgb, loc_map), axis=1) img = img[:, :, ::-1] img = cv2.resize(img, (img.shape[1] * 3, img.shape[0] * 3), interpolation=cv2.INTER_CUBIC) cv2.imshow("nav", img) cv2.waitKey(40) def resize_img(self, img): return cv2.resize(img, (self.img_width, self.img_height), interpolation=cv2.INTER_AREA) def motion_primitive(self, action): # 0: Forward # 1: Turn Left # 2: Turn Right # 3: Strafe Left # 4: Strafe Right # 5: Backward collision_flag = False if action == 0: if not self.train_mode: print('Action: Forward') if not self.env.move_forward(self.move_sensitivity): if not self.train_mode: print('Cannot move forward, collision!!!') collision_flag = True elif action == 1: if not self.train_mode: print('Action: Turn Left') self.env.rotate(-self.rot_sensitivity) elif action == 2: if not self.train_mode: print('Action: Turn Right') self.env.rotate(self.rot_sensitivity) elif action == 3: if not self.train_mode: print('Action: Strafe Left') if not self.env.move_forward(dist_fwd=0, dist_hor=-self.move_sensitivity): if not self.train_mode: print('Cannot strafe left, collision!!!') collision_flag = True elif action == 4: if not self.train_mode: print('Action: Strafe Right') if not self.env.move_forward(dist_fwd=0, dist_hor=self.move_sensitivity): if not self.train_mode: print('Cannot strafe right, collision!!!') collision_flag = True elif action == 5: if not self.train_mode: print('Action: Backward') if not self.env.move_forward(-self.move_sensitivity): if not self.train_mode: print('Cannot move backward, collision!!!') collision_flag = True else: raise ValueError('unknown action type: [{0:d}]'.format(action)) return collision_flag def move_forward(self, dist_fwd, dist_hor=0): """ Move with `fwd` distance to the front and `hor` distance to the right. Both distance are float numbers. Ignore collision !!! """ pos = self.env.cam.pos pos = pos + self.env.cam.front * dist_fwd pos = pos + self.env.cam.right * dist_hor self.env.cam.pos.x = pos.x self.env.cam.pos.z = pos.z def motion_primitive_no_check(self, action): # motion primitive without collision checking # 0: Forward # 1: Turn Left # 2: Turn Right # 3: Strafe Left # 4: Strafe Right # 5: Backward collision_flag = False if action == 0: self.move_forward(self.move_sensitivity) elif action == 1: self.env.rotate(-self.rot_sensitivity) elif action == 2: self.env.rotate(self.rot_sensitivity) elif action == 3: self.move_forward(dist_fwd=0, dist_hor=-self.move_sensitivity) elif action == 4: self.move_forward(dist_fwd=0, dist_hor=self.move_sensitivity) elif action == 5: self.move_forward(-self.move_sensitivity) else: raise ValueError('unknown action type: [{0:d}]'.format(action)) return collision_flag def get_obs(self): self.env.set_render_mode('rgb') rgb = self.env.render() self.env.set_render_mode('depth') depth = self.env.render() infmask = depth[:, :, 1] depth = depth[:, :, 0] * (infmask == 0) true_depth = depth.astype(np.float32) / 255.0 * 20.0 extrinsics = self.env.cam.getExtrinsicsNumpy() return rgb, true_depth, extrinsics def get_seen_area(self, rgb, depth, extrinsics, out_mat, inv_E=True): points, points_colors = gen_point_cloud( depth, rgb, extrinsics, depth_threshold=self.depth_threshold, inv_E=inv_E) grid_locs = np.floor( (points[:, [0, 2]] - self.L_min) / self.grid_size).astype(int) grids_mat = np.zeros( (self.grids_mat.shape[0], self.grids_mat.shape[1]), dtype=np.uint8) high_filter_idx = points[:, 1] < HEIGHT_THRESHOLD[1] low_filter_idx = points[:, 1] > HEIGHT_THRESHOLD[0] obstacle_idx = np.logical_and(high_filter_idx, low_filter_idx) self.safe_assign(grids_mat, grid_locs[high_filter_idx, 0], grid_locs[high_filter_idx, 1], 2) kernel = np.ones((3, 3), np.uint8) grids_mat = cv2.morphologyEx(grids_mat, cv2.MORPH_CLOSE, kernel) obs_mat = np.zeros((self.grids_mat.shape[0], self.grids_mat.shape[1]), dtype=np.uint8) self.safe_assign(obs_mat, grid_locs[obstacle_idx, 0], grid_locs[obstacle_idx, 1], 1) kernel = np.ones((self.ob_dilation_kernel, self.ob_dilation_kernel), np.uint8) obs_mat = cv2.morphologyEx(obs_mat, cv2.MORPH_CLOSE, kernel) obs_idx = np.where(obs_mat == 1) self.safe_assign(grids_mat, obs_idx[0], obs_idx[1], 1) out_mat[np.where(grids_mat == 2)] = 2 out_mat[np.where(grids_mat == 1)] = 1 seen_area = np.sum(out_mat > 0) return seen_area def cal_reward(self, rgb, depth, extrinsics, collision_flag): filled_grid_num = self.get_seen_area(rgb, depth, extrinsics, self.grids_mat, inv_E=True) area_reward = (filled_grid_num - self.seen_area) reward = area_reward * self.area_reward_scale if collision_flag: reward -= self.collision_penalty reward -= self.step_penalty self.seen_area = filled_grid_num raw_reward = {'area': area_reward, 'collision_flag': collision_flag} return reward, filled_grid_num, raw_reward def get_loc_map(self): top_down_map = self.grids_mat.T.copy() half_size = max(top_down_map.shape[0], top_down_map.shape[1], self.configs['large_map_range']) * 3 ego_map = np.ones( (half_size * 2, half_size * 2, 3), dtype=np.uint8) * 255 loc_map = np.zeros((top_down_map.shape[0], top_down_map.shape[1], 3), dtype=np.uint8) loc_map[top_down_map == 0] = np.array([255, 255, 255]) loc_map[top_down_map == 1] = np.array([0, 0, 255]) loc_map[top_down_map == 2] = np.array([0, 255, 0]) current_pos, grid_current_pos = self.get_camera_grid_pos() x_start = half_size - grid_current_pos[1] y_start = half_size - grid_current_pos[0] x_end = x_start + top_down_map.shape[0] y_end = y_start + top_down_map.shape[1] assert x_start >= 0 and y_start >= 0 and \ x_end <= ego_map.shape[0] and y_end <= ego_map.shape[1] ego_map[x_start:x_end, y_start:y_end] = loc_map center = (half_size, half_size) rot_angle = self.constrain_to_pm_pi(90 + current_pos[2]) M = cv2.getRotationMatrix2D(center, rot_angle, 1.0) ego_map = cv2.warpAffine(ego_map, M, (ego_map.shape[1], ego_map.shape[0]), flags=cv2.INTER_AREA, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255)) start = half_size - self.configs['small_map_range'] end = half_size + self.configs['small_map_range'] small_ego_map = ego_map[start:end, start:end] start = half_size - self.configs['large_map_range'] end = half_size + self.configs['large_map_range'] assert start >= 0 assert end <= ego_map.shape[0] large_ego_map = ego_map[start:end, start:end] return cv2.resize( large_ego_map, (self.configs['large_map_size'], self.configs['large_map_size']), interpolation=cv2.INTER_AREA), small_ego_map def safe_assign(self, im_map, x_idx, y_idx, value): try: im_map[x_idx, y_idx] = value except IndexError: valid_idx1 = np.logical_and(x_idx >= 0, x_idx < im_map.shape[0]) valid_idx2 = np.logical_and(y_idx >= 0, y_idx < im_map.shape[1]) valid_idx = np.logical_and(valid_idx1, valid_idx2) im_map[x_idx[valid_idx], y_idx[valid_idx]] = value def constrain_to_pm_pi(self, theta): # make sure theta is within [-180, 180) return (theta + 180) % 360 - 180 def get_camera_grid_pos(self): current_pos = np.array([ self.env.cam.pos.x, self.env.cam.pos.z, self.constrain_to_pm_pi(self.env.cam.yaw) ]) grid_pos = np.array( self.env.house.to_grid(current_pos[0], current_pos[1])) return current_pos, grid_pos def truncated_norm(self, mu, sigma, lower_limit, upper_limit, size): if sigma == 0: return mu lower_limit = (lower_limit - mu) / sigma upper_limit = (upper_limit - mu) / sigma r = truncnorm(lower_limit, upper_limit, loc=mu, scale=sigma) return r.rvs(size)
for i in range(env.house.obsMap.shape[0]): for j in range(env.house.obsMap.shape[1]): if env.house.obsMap[i][j] == 0: con.append([i, j]) for i in range(100): gx, gy = random.choice(con) x, y = env.house.to_coor(gx, gy, True) loc.append([x, y]) print([gx, gy, x, y]) #print(loc) #print (env.cam.pos) for i in range(len(loc)): env.cam.pos.x = loc[i][0] env.cam.pos.y = env.house.robotHei env.cam.pos.z = loc[i][1] #mat = cv2.cvtColor(env.render_cube_map(), cv2.COLOR_BGR2RGB) mat = env.debug_render() rgb = env.render(RenderMode.RGB) semantic = env.render(RenderMode.SEMANTIC) depth = env.render(RenderMode.DEPTH) infmask = depth[:, :, 1] depth = depth[:, :, 0] * (infmask == 0) cv2.imwrite('depth.png', depth) cv2.imshow("aaa", mat) cv2.waitKey(0)
class TrajectoryGenerator(): def __init__(self, traj_dir, house_id=args.house_id, traj_id=None, load_graph=True): self.house_id = house_id self.traj_dir = traj_dir if house_id != None: self.create_env(args.config_path, house_id, args.render_width, args.render_height) # Contains methods for calculataing distances, room location, etc self.hp = HouseParse(dataDir=args.data_dir) # Loads house graph and generates shortest paths self.utils = House3DUtils( self.env, rotation_sensitivity=45, target_obj_conn_map_dir=False, # Changed load_graph method to use pickle directly and Graph(g) initialisation; # self.graph.load(path) left the graph empty! build_graph=load_graph, graph_dir=args.graph_dir) self.house = {} self.current_room = None self.env_coors = None self.traj_id = None if traj_id != None: self.update_trajectory(traj_id) """ Initialize environment for a given house. """ def create_env(self, config_path, house_id, render_width=args.render_width, render_height=args.render_height): api = objrender.RenderAPIThread(w=render_width, h=render_height, device=0) cfg = load_config(config_path) self.env = Environment(api, house_id, cfg) """ Load given trajectory from file. """ def update_trajectory(self, traj_id): self.traj_id = traj_id load_path = os.path.join(self.traj_dir, self.house_id + '.npy') self.env_coors = np.load(load_path)[traj_id] # Add look-arounds when entering rooms print( 'Preprocessing trajectory for room views (90 degrees left and right)' ) self.house = {} self.add_180s_to_trajectory() """ Update the agent's position. """ def update_env(self, new_pos): self.env.cam.pos.x = new_pos[0] self.env.cam.pos.y = 1.2 self.env.cam.pos.z = new_pos[2] self.env.cam.yaw = new_pos[3] self.env.cam.updateDirection() """ Preprocesses trajectory by adding frames where the agent looks around when entering a room. """ def add_180s_to_trajectory(self): # Index house rooms first self.build_rooms_and_objects_description() new_coors = [] for i in range(len(self.env_coors)): self.update_env(self.env_coors[i]) new_coors.append(self.env_coors[i]) # Entered a new room, look around if self.update_current_room(self.env_coors[i]): init_yaw = new_coors[-1][3] new_coor = new_coors[-1] # Look around (left, right) up to 90 degrees in increments of 30 yaw_adds = [1, 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, 1] for yaw_add in yaw_adds: new_coor = (new_coor[0], new_coor[1], new_coor[2], new_coor[3] + 30 * yaw_add) new_coors.append(new_coor) self.env_coors = new_coors """ Render the trajectory step by step. """ def render_and_save_trajectory(self, frame_dir): self.env.set_render_mode(RenderMode.RGB) s = self.env_coors[0] d = self.env_coors[-1] assert os.path.exists( frame_dir), 'Can\'t save frames, non-existent directory!' filename = '%s/%s_%04d.mp4' % (frame_dir, self.house_id, self.traj_id) print('Generating', filename) video = cv2.VideoWriter(filename, cv2.VideoWriter_fourcc(*'mp4v'), args.fps, (args.render_width, args.render_height)) for i in range(len(self.env_coors)): self.update_env(self.env_coors[i]) img = np.array(self.env.render(), copy=False) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Write video frame video.write(img) cv2.destroyAllWindows() video.release() """ Render the agent's current view in a given mode. """ def render_frame(self, mode=RenderMode.RGB): self.env.set_render_mode(mode) img = self.env.render() if mode in [RenderMode.SEMANTIC, RenderMode.RGB]: img = np.array(img, copy=False, dtype=np.int32) return img elif mode == RenderMode.DEPTH: img = img[:, :, 0] img = np.array(img, copy=False, dtype=np.float32) return img else: return None """ Returns a set of all the simple room types in the given list, with the following changes: - guest_room -> bedroom - toilet -> bathroom """ @staticmethod def get_room_types(types): room_types = types if 'toilet' in room_types: room_types.remove('toilet') if not 'bathroom' in room_types: room_types.append('bathroom') if 'guest_room' in room_types: room_types.remove('guest_room') if not 'bedroom' in room_types: room_types.append('bedroom') return list(sorted(room_types)) """ Trajectory/question generation requires frame-by-frame semantic processing and establishing object/room attributes. As we have access to the ground truth information, we can just lookup the properties in the dict that this method returns. """ def build_rooms_and_objects_description(self): obj_id_to_color = json.load(open(args.obj_color_path)) room_unique_id = 0 for room in self.utils.rooms: # Add room type to dict room_types = TrajectoryGenerator.get_room_types(room['type']) room_type = '|'.join(room_types) if not room_type in self.house: self.house[room_type] = { 'room_list': [], 'count': 0, 'been_here_count': 0 } self.house[room_type]['count'] += 1 # Prepare property container for room room_unique_id += 1 room_desc = { 'been_here': False, 'room_type': room_type, 'bbox': room['bbox'], 'objects': {}, 'room_id': room_type + str(room_unique_id) } objects_in_room = self.get_object_objects_in_room(room) for obj in objects_in_room: if not obj['coarse_class'] in constants.query_objects: continue # Add object type to dict obj_type = obj['coarse_class'] if not obj_type in room_desc['objects']: room_desc['objects'][obj_type] = { 'obj_list': [], 'count': 0, 'seen_count': 0 } room_desc['objects'][obj_type]['count'] += 1 # Prepare property container for object color = None node = '.0_' + obj['id'][2:] if self.house_id + node in obj_id_to_color: color = obj_id_to_color[self.house_id + node] obj_desc = { 'node': node, 'bbox': obj['bbox'], 'color': color, 'seen': False, 'room_location': room_type, 'obj_type': obj_type, 'room_id': room_type + str(room_unique_id) } room_desc['objects'][obj_type]['obj_list'].append(obj_desc) self.house[room_type]['room_list'].append(room_desc) """ Index all objects in the given room. """ def get_object_objects_in_room(self, room): return [ self.utils.objects['0_' + str(node)] for node in room['nodes'] if '0_' + str(node) in self.utils.objects and self.utils.objects[ '0_' + str(node)]['coarse_class'] in constants.query_objects ] """ Generate a shortest path between random locations in room1 and room2. """ def generate_random_path(self, room1, room2): # Disabled assert in getRandomLocation for ALLOWED_TARGET_ROOM_TYPES c1 = self.env.house.getRandomLocation( room1, return_grid_loc=True, mixedTp=True) + (0, ) c2 = self.env.house.getRandomLocation( room2, return_grid_loc=True, mixedTp=True) + (0, ) print('Generating random path from %s to %s' % (room1, room2), c1, c2) start = time.time() path = self.utils.compute_shortest_path(c1, c2) print(time.time() - start, 'seconds to generate path of length', len(path.nodes)) return path """ Turn grid path into a trajectory inside the house. """ def get_graph_to_env_coors(self, path): env_coors = [] for node in path.nodes: to_coor = self.env.house.to_coor(node[0], node[1]) env_coor = (to_coor[0], self.env.house.robotHei, to_coor[1], node[2]) env_coors.append(env_coor) return env_coors """ Returns whether the agent entered a new room. """ def update_current_room(self, agent_pos): agent_new_pos_obj = TrajectoryGenerator.get_agent_pos_obj(agent_pos) if self.current_room == None or \ not (self.hp.isContained(self.current_room, agent_new_pos_obj, axis=0) and \ self.hp.isContained(self.current_room, agent_new_pos_obj, axis=2)): # New room for room_type in self.house: for room in self.house[room_type]['room_list']: if self.hp.isContained(room, agent_new_pos_obj, axis=0) and \ self.hp.isContained(room, agent_new_pos_obj, axis=2): self.current_room = room # print("New room entered:", self.current_room['room_id']) self.current_room['been_here'] = True self.house[self.current_room['room_type']][ 'been_here_count'] += 1 return True return False """ Given a door node, finds at most two rooms which are on either side of the door and adds this information to the door object. """ def find_adjacent_rooms_for_door(self, door_node): door_obj = self.doors[door_node] # One adjacent room is the room which the door belongs to door_obj['adjacent_rooms'] = [door_obj['room_id']] # If the door doesn't belong to the room we're currently in, append the latter to the list # as the other adjacent room if door_obj['room_id'] != self.current_room['room_id']: door_obj['adjacent_rooms'].append(self.current_room['room_id']) return # Looking for the second adjacent room for room_type in self.house: for room_obj in self.house[room_type]['room_list']: if room_obj['room_id'] == door_obj['room_id']: continue if self.hp.isContained(room_obj, door_obj, axis=0) or\ self.hp.isContained(room_obj, door_obj, axis=2): # Found the other adjacent room door_obj['adjacent_rooms'].append(room_obj['room_id']) return """ Given a list of objects in the current view (type->count, approx_depths) and the ground truth information, mark doors that were seen. Unlike match_seen_to_ground_truth(), this method tries to find seen doors in the entire house. (Uses approximation when computing distances, not 100% accurate.) """ def match_seen_to_doors(self, objs_in_frame, agent_pos): if 'door' not in objs_in_frame: return [] agent_pos_obj = TrajectoryGenerator.get_agent_pos_obj(agent_pos) doors_seen = [] count = objs_in_frame['door']['count'] depths = objs_in_frame['door']['depths'] for i in range(count): curr_depth = depths[i] for room_type in self.house: for room in self.house[room_type]['room_list']: if not 'door' in room['objects']: continue door_objs_in_room = room['objects']['door']['obj_list'] for door_obj in door_objs_in_room: # Distance from agent to bbox centre of door coord_bbox = list((np.array(door_obj['bbox']['min']) +\ np.array(door_obj['bbox']['max'])) / 2) bbox_centre = { 'bbox': { 'min': coord_bbox, 'max': coord_bbox } } true_dist1 = self.hp.getClosestDistance( agent_pos_obj, bbox_centre) # Default distance computation true_dist2 = self.hp.getClosestDistance( agent_pos_obj, door_obj) # Check if the door object corresponds to the door seen if isclose(curr_depth, true_dist1, rtol=0.25) or\ isclose(curr_depth, true_dist2, rtol=0.25): doors_seen.append(door_obj) break return doors_seen """ Given a list of objects in the current view (type->count, approx_depths) and the ground truth information, mark objects that were seen. (Uses approximation when computing distances, not 100% accurate.) """ def match_seen_to_ground_truth(self, objs_in_frame, agent_pos): if self.current_room == None: return [] agent_pos_obj = TrajectoryGenerator.get_agent_pos_obj(agent_pos) obj_nodes_seen = [] for obj_type in objs_in_frame: count = objs_in_frame[obj_type]['count'] depths = objs_in_frame[obj_type]['depths'] for i in range(count): if not obj_type in self.current_room['objects']: continue curr_depth = depths[i] for j in range( self.current_room['objects'][obj_type]['count']): # An object is usually visible across multiple frames - only mark it once if not self.current_room['objects'][obj_type]['obj_list'][ j]['seen']: # Get distance between bboxes of agent and object coord1 = list((np.array( self.current_room['objects'][obj_type]['obj_list'][j]['bbox']['min']) +\ np.array( self.current_room['objects'][obj_type]['obj_list'][j]['bbox']['max'])) / 2) bbox_centre = {'bbox': {'min': coord1, 'max': coord1}} true_dist1 = self.hp.getClosestDistance( agent_pos_obj, bbox_centre) true_dist2 = self.hp.getClosestDistance( agent_pos_obj, self.current_room['objects'] [obj_type]['obj_list'][j]) # Check if the object in view is in the room if isclose(curr_depth, true_dist1, rtol=0.25) or\ isclose(curr_depth, true_dist2, rtol=0.25): obj_nodes_seen.append(self.current_room['objects'] [obj_type]['obj_list'][j]) self.current_room['objects'][obj_type]['obj_list'][ j]['seen'] = True self.current_room['objects'][obj_type][ 'seen_count'] += 1 break return obj_nodes_seen """ Generate a trajectory and gather seen rooms and objects. Optionally returns objects in all video frames corresponding to the trajectory. """ def generate_trajectory_and_seen_items(self, frame_dir=None, compute_seen_doors=False, return_objects_in_frames=False): self.build_rooms_and_objects_description() if frame_dir: self.render_and_save_trajectory(frame_dir) rgb_to_obj = TrajectoryGenerator.get_semantic_to_object_mapping( args.csv_path) self.current_room = None if return_objects_in_frames: objects_in_frames = [] # Parse frames start = time.time() for c in range(len(self.env_coors)): # Update agent position in the environment self.update_env(self.env_coors[c]) semantic_img = self.render_frame(mode=RenderMode.SEMANTIC) depth_img = self.render_frame(mode=RenderMode.DEPTH) # Mark current room as visited self.update_current_room(self.env_coors[c]) # Get objects types and approximate depths from current frame objs_in_frame = TrajectoryGenerator.get_objects_in_frame( semantic_img, rgb_to_obj, depth_img) if return_objects_in_frames: objects_in_frames.append(list(objs_in_frame.keys())) # Mark objects in ground truth room that correspond to current view seen_nodes = self.match_seen_to_ground_truth( objs_in_frame, self.env_coors[c]) if compute_seen_doors: # Store objects corresponding to all seen doors (in the entire house) seen_doors = self.match_seen_to_doors(objs_in_frame, self.env_coors[c]) for door in seen_doors: if door['node'] in self.doors: continue # See which rooms are on both sides of the new door self.doors[door['node']] = door self.find_adjacent_rooms_for_door(door['node']) print(time.time() - start, 'seconds to process', str(len(self.env_coors)), 'frames.') if return_objects_in_frames: return objects_in_frames """ Returns a list of the valid room types in the house. """ def get_all_valid_room_types(self): return [ x for x in self.env.house.all_roomTypes if TrajectoryGenerator.valid_room_type(x) ] """ Get pairs of nearby objects for a visited room with marked objects. """ def get_nearby_object_pairs(self, room_desc): assert room_desc['been_here'], 'This room has not been visited!' obj_container = [] for obj_type in room_desc['objects']: cnt_type = 0 for obj_entry in room_desc['objects'][obj_type]['obj_list']: # Make sure the object was seen on the trajectory if not obj_entry['seen']: continue cnt_type += 1 obj = ItemInfo(name=obj_type + str(cnt_type), meta=obj_entry) obj_container.append(obj) if len(obj_container) > 0: return self.hp.getNearbyPairs(obj_container, hthreshold=args.h_threshold, vthreshold=args.v_threshold) return {'on': [], 'next_to': []} """ Returns a dict with keys ['on', 'next_to'] and values as list of tuples (obj1, obj2, dist) showing spatial relationships between objects. """ def get_all_nearby_object_pairs(self): all_pairs = {'on': [], 'next_to': []} for room_type in self.house: for room_obj in self.house[room_type]['room_list']: # Only look at visited rooms if room_obj['been_here']: pairs = self.get_nearby_object_pairs(room_obj) for rel in ['on', 'next_to']: all_pairs[rel] += pairs[rel] return all_pairs """ Get the list of all objects (either on the trajectory or in the entire house). """ def get_all_objects(self, include_unseen_objects=False, include_objects_in_all_rooms=False): obj_list = [] for room_type in self.house: for room in self.house[room_type]['room_list']: if not room['been_here'] and not include_objects_in_all_rooms: continue for obj_type in room['objects']: for obj in room['objects'][obj_type]['obj_list']: if obj['seen'] or include_unseen_objects: obj_list.append(obj) return obj_list """ Get the list of all rooms (either on the trajectory or in the entire house). Does not include object list for rooms. """ def get_all_rooms(self, include_unseen_rooms=False): room_list = [] for room_type in self.house: for room in self.house[room_type]['room_list']: if room['been_here'] or include_unseen_rooms: # Don't include object list room_list.append({ 'been_here': True, 'room_type': room_type, 'bbox': room['bbox'], 'room_id': room['room_id'] }) return room_list """ Return agent's current position inside an object with 'bbox' attribute. Useful for calling HouseParse methods. """ @staticmethod def get_agent_pos_obj(agent_pos): agent_new_pos_obj = { 'bbox': { 'min': agent_pos[0:3], 'max': agent_pos[0:3], }, } return agent_new_pos_obj """ Given a depth map and an image with numbered disjoint regions corresponding to a single object type, return an __approximate__ depth for each one of them. """ @staticmethod def get_approx_depths_for_object_type(depth_img, labeled_objs_img, num_objs): approx_depths = [] for i in range(num_objs): first_idx = next(idx for idx, val in np.ndenumerate(labeled_objs_img) if val == i + 1) approx_depths.append(depth_img[first_idx] / 255.0 * 20.0) return approx_depths """ Extract objects from a frame. object_type -> (num_objects, object_depths) """ @staticmethod def get_objects_in_frame(semantic_img, rgb_to_obj, depth_img): label_img = TrajectoryGenerator.rgb_to_int_image(semantic_img) labels = np.unique(label_img) objs_in_frame = {} # Process information about each unique type of object in the current frame only_curr_obj_img = np.zeros(shape=label_img.shape, dtype=np.int32) for i in range(len(labels)): # "Paint" on a background only the objects of the current type only_curr_obj_img[:, :] = 0 only_curr_obj_img[np.where(label_img == labels[i])] = 1 # Find number of occurrences of object in frame (might be misleading, occlusions or # several objects overlapping e.g. chairs) s = [[1, 1, 1], [1, 1, 1], [1, 1, 1]] num_objs = label(only_curr_obj_img, output=only_curr_obj_img) # Find semantic color of object and convert it to object type first_idx = next(idx for idx, val in np.ndenumerate(only_curr_obj_img) if val == 1) rgb = (semantic_img[first_idx[0], first_idx[1], 0], semantic_img[first_idx[0], first_idx[1], 1], semantic_img[first_idx[0], first_idx[1], 2]) obj_name = rgb_to_obj[rgb] # Check if we want to ask questions about the object type if not obj_name in constants.query_objects: continue objs_in_frame[obj_name] = {} # Get number of objects of this type in the frame objs_in_frame[obj_name]['count'] = num_objs # Get approximate depths depths = TrajectoryGenerator.get_approx_depths_for_object_type( depth_img, only_curr_obj_img, num_objs) objs_in_frame[obj_name]['depths'] = depths return objs_in_frame """ Open CSV category file to map semantic frames to sets of objects. """ @staticmethod def get_semantic_to_object_mapping(path): f = open(path, newline="") reader = csv.DictReader(f) all_objects_dict = {} for line in reader: all_objects_dict[(int(line['r']), int(line['g']), int(line['b']))] = line['name'] return all_objects_dict """ Map RGB values in an image to integers: (r,g,b) -> (256^2 * r + 256 * g + b). """ @staticmethod def rgb_to_int_image(img): out = np.zeros(shape=(img.shape[0], img.shape[1]), dtype=np.int32) out = (img[:, :, 0] << 16) | (img[:, :, 1] << 8) | (img[:, :, 2]) return out """ Blacklist some room types. """ @staticmethod def valid_room_type(room_type): return len(room_type) > 0 and\ all([not tp.lower() in constants.exclude_rooms for tp in room_type])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_file', default='../path_data/cleaned_human_demo_data.json', type=str) parser.add_argument('--demo_id', default=0, type=int) parser.add_argument('--width', type=int, default=600) parser.add_argument('--height', type=int, default=450) parser.add_argument('--filter_height', type=bool, default=True) args = parser.parse_args() with open(args.data_file, 'r') as f: data = json.load(f) demo = data[args.demo_id] print('Total time steps:', len(demo['loc'])) locs = np.array(demo['loc'][1:]) ques = demo['question'] answer = demo['answer'] text = 'Q: {0:s} A: {1:s}'.format(ques, answer) text_height = 60 cfg = load_config('config.json') api = objrender.RenderAPI(w=args.width, h=args.height, device=0) env = Environment(api, demo['house_id'], cfg) L_min = env.house.L_min_coor L_max = env.house.L_max_coor L_min = np.array([[env.house.L_lo[0], L_min[1], env.house.L_lo[1]]]) L_max = np.array([[env.house.L_hi[0], L_max[1], env.house.L_hi[1]]]) grid_size = env.house.grid_det n_row = env.house.n_row grid_num = np.array([ n_row[0] + 1, int((L_max[0][1] - L_min[0][1]) / (grid_size + 1e-8)) + 1, n_row[1] + 1 ]) print('Grid size:', grid_size) print('Number of grid in [x, y, z]:', grid_num) all_grids = np.zeros(tuple(grid_num), dtype=bool) grid_colors = np.zeros(tuple(grid_num) + (3, ), dtype=np.uint8) loc_map = env.gen_locmap() obs_map = env.house.obsMap.T obs_pos = obs_map == 1 for t in tqdm(range(len(locs))): env.reset(x=locs[t][0], y=locs[t][2], yaw=locs[t][3]) depth = env.render(RenderMode.DEPTH) rgb = env.render(RenderMode.RGB) semantic = env.render(RenderMode.SEMANTIC) infmask = depth[:, :, 1] depth = depth[:, :, 0] * (infmask == 0) true_depth = depth.astype(np.float32) / 255.0 * 20.0 extrinsics = env.cam.getExtrinsicsNumpy() points, points_colors = gen_point_cloud(true_depth, rgb, extrinsics) grid_locs = np.floor((points - L_min) / grid_size).astype(int) all_grids[grid_locs[:, 0], grid_locs[:, 1], grid_locs[:, 2]] = True grid_colors[grid_locs[:, 0], grid_locs[:, 1], grid_locs[:, 2]] = points_colors depth = np.stack([depth] * 3, axis=2) loc_map[grid_locs[:, 2], grid_locs[:, 0], :] = np.array([250, 120, 120]) loc_map[obs_pos] = 0 rad = env.house.robotRad / env.house.grid_det x, y = env.cam.pos.x, env.cam.pos.z x, y = env.house.to_grid(x, y) loc_map_cp = loc_map.copy() cv2.circle(loc_map_cp, (x, y), int(rad), (0, 0, 255), thickness=-1) loc_map_resized = cv2.resize(loc_map_cp, env.resolution) concat1 = np.concatenate((rgb, semantic), axis=1) concat2 = np.concatenate((depth, loc_map_resized), axis=1) ret = np.concatenate((concat1, concat2), axis=0) ret = ret[:, :, ::-1] pad_text = np.ones((text_height, ret.shape[1], ret.shape[2]), dtype=np.uint8) mat_w_text = np.concatenate((np.copy(ret), pad_text), axis=0) cv2.putText(mat_w_text, text, (10, mat_w_text.shape[0] - 10), FONT, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow("human_demo", mat_w_text) cv2.waitKey(20) valid_grids = np.argwhere(all_grids) valid_grid_center = valid_grids * grid_size + L_min valid_grid_color = grid_colors[valid_grids[:, 0], valid_grids[:, 1], valid_grids[:, 2]] if args.filter_height: height_lim = [-0.1, 2.3] valid_idx = np.logical_and(valid_grid_center[:, 1] >= height_lim[0], valid_grid_center[:, 1] <= height_lim[1]) valid_grid_center = valid_grid_center[valid_idx] valid_grid_color = valid_grid_color[valid_idx] valid_grids = valid_grids[valid_idx] loc_map = env.gen_locmap() obs_map = env.house.obsMap.T loc_map[valid_grids[:, 2], valid_grids[:, 0], :] = np.array([250, 120, 120]) loc_map[obs_map == 1] = 0 loc_map = cv2.resize(loc_map, env.resolution) cv2.imwrite('seen_map.png', loc_map) if USE_OPEN3D: pcd = PointCloud() pcd.points = Vector3dVector(valid_grid_center) pcd.colors = Vector3dVector(valid_grid_color / 255.0) coord = create_mesh_coordinate_frame(3, [40, 0, 35]) draw_geometries([pcd, coord])