def get_top_down_image(self, env_id, set_idx, seg_idx): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :param set_idx: instruction set number :param seg_idx: segment index :return: """ # TODO: Revise the bazillion versions of poses - get rid of this specific one path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) plot_path_on_img(env_image, path_img) seg = self.all_instr[env_id][set_idx]["instructions"][seg_idx] start_idx = seg["start_idx"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: return None affine = get_affine_matrix(start_pt, dir_yaw) seg_img_t = self.gen_top_down_image(env_image, affine) #seg_img_t = seg_img_t.permute(0, 1, 3, 2) # A 2D pose is specified as [pos_x, pos_y, yaw] # A 3D pose would be [pos_x, pos_y, pos_z, r_x, r_y, r_z, r_w] img_pose_2d = {"pos": start_pt, "yaw": dir_yaw} img_pose_2d_t = torch.FloatTensor([start_pt[0], start_pt[1], dir_yaw]).unsqueeze(0) return seg_img_t, img_pose_2d_t
def gen_top_down_labels(path, affine, img_w, img_h, map_w, map_h, incl_path=True, incl_endp=True): seg_labels = np.zeros([img_w, img_h, 2]).astype(float) path_in_img = cf_to_img(path, np.array([map_w, map_h])) gauss_sigma = map_w / 96 seg_labels[:, :, 0] = plot_path_on_img(seg_labels[:,:,0], path_in_img) if len(path_in_img) > 1: seg_labels[:,:,1] = plot_dot_on_img(seg_labels[:,:,1], path_in_img[-1], gauss_sigma) seg_labels_rot = apply_affine(seg_labels, affine, img_w, img_h) seg_labels_rot[:, :, 0] = gaussian_filter(seg_labels_rot[:, :, 0], gauss_sigma) seg_labels_rot[:, :, 1] = gaussian_filter(seg_labels_rot[:, :, 1], gauss_sigma) # Standardize both channels separately (each has mean zero, unit variance) seg_labels_path = standardize_2d_prob_dist(seg_labels_rot[:, :, 0:1]) seg_labels_endpt = standardize_2d_prob_dist(seg_labels_rot[:, :, 1:2]) if DEBUG: cv2.imshow("l_traj", seg_labels_path[0, :, :]) cv2.imshow("l_endpt", seg_labels_endpt[0, :, :]) cv2.waitKey(0) if incl_path and not incl_endp: seg_labels_rot = seg_labels_path elif incl_endp and not incl_path: seg_labels_rot = seg_labels_endpt else: seg_labels_rot = np.concatenate((seg_labels_path, seg_labels_endpt), axis=0) seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() return seg_labels_t
def get_top_down_ground_truth_static_ego(env_id, start_idx, img_w, img_h, map_w, map_h): """ Returns the ground-truth label oriented in the global map frame :param env_id: :param start_idx: :param img_w: :param img_h: :param map_w: :param map_h: :return: """ path = load_path(env_id) #instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] start_pt, dir_yaw = tdd.get_start_pt_and_yaw(path, start_idx, map_w, map_h, 0) affine = tdd.get_affine_matrix(start_pt, dir_yaw, img_w, img_h) seg_labels = np.zeros([img_w, img_h, 2]).astype(float) path_in_img = cf_to_img(path, np.array([map_w, map_h])) #gauss_sigma = map_w / 96 gauss_sigma = map_w / 32 seg_labels[:, :, 0] = tdd.plot_path_on_img(seg_labels[:, :, 0], path_in_img) if len(path_in_img) > 1: seg_labels[:, :, 1] = tdd.plot_dot_on_img(seg_labels[:, :, 1], path_in_img[-1], gauss_sigma) seg_labels_rot = tdd.apply_affine(seg_labels, affine, img_w, img_h) seg_labels_rot[:, :, 0] = gaussian_filter(seg_labels_rot[:, :, 0], gauss_sigma) seg_labels_rot[:, :, 1] = gaussian_filter(seg_labels_rot[:, :, 1], gauss_sigma) DEBUG = True if DEBUG: cv2.imshow("l_traj", seg_labels_rot[:, :, 0]) cv2.imshow("l_endpt", seg_labels_rot[:, :, 1]) cv2.waitKey(0) # Standardize both channels separately (each has mean zero, unit variance) seg_labels_path = standardize_2d_prob_dist(seg_labels_rot[:, :, 0:1]) seg_labels_endpt = standardize_2d_prob_dist(seg_labels_rot[:, :, 1:2]) seg_labels_rot = np.concatenate((seg_labels_path, seg_labels_endpt), axis=0) seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() return seg_labels_t
def get_start_pt_and_yaw(path, start_idx, map_w, map_h, yaw_rand_range): path_img = cf_to_img(path, np.array([map_w, map_h])) if start_idx > len(path) - 2: return None, None start_pt = path_img[start_idx] # Due to the way the data is collected, turns result in multiple subsequent points at the same location, # which messes up the start orientation. That's why we search for the next point that isn't the same as current pt next_idx = start_idx + 1 next_pt = path_img[next_idx] while (next_pt == start_pt).all() and next_idx < len(path) - 1: next_idx += 1 next_pt = path_img[next_idx] dir_vec = next_pt - start_pt dir_yaw = vec_to_yaw(dir_vec) - np.pi / 2 if yaw_rand_range > 0: dir_yaw_offset = random.gauss(0, yaw_rand_range) #dir_yaw_offset = random.uniform(-yaw_rand_range, yaw_rand_range) dir_yaw = dir_yaw + dir_yaw_offset return start_pt, dir_yaw
def get_top_down_ground_truth_dynamic_global(env_id, start_idx, end_idx, drone_pos_as, img_w, img_h, map_w, map_h): """ Returns the ground-truth label oriented in the global map frame :param env_id: :param start_idx: :param img_w: :param img_h: :param map_w: :param map_h: :return: """ PROFILE = False prof = SimpleProfiler(False, PROFILE) path = load_path(env_id, anno=True) #print(len(path), start_idx, end_idx) path = path[start_idx:end_idx] #instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] prof.tick("load_path") units = UnrealUnits(1.0) drone_pos_cf = units.pos3d_from_as(drone_pos_as) #print("Dynamic ground truth for ", env_id, start_idx, end_idx) gt_dynamic = get_dynamic_ground_truth_v2(path, drone_pos_cf[:2]) #Presenter().plot_path(env_id, [path[start_idx:end_idx], gt_dynamic]) prof.tick("gen_gt_path") seg_labels = np.zeros([img_w, img_h, 2]).astype(float) path_in_img = cf_to_img(gt_dynamic, np.array([map_w, map_h])) gauss_sigma = map_w / 96 seg_labels[:, :, 0] = tdd.plot_path_on_img(seg_labels[:, :, 0], path_in_img) if len(path_in_img) > 1: seg_labels[:, :, 1] = tdd.plot_dot_on_img(seg_labels[:, :, 1], path_in_img[-1], gauss_sigma) prof.tick("plot_path") seg_labels[:, :, 0] = gaussian_filter(seg_labels[:, :, 0], gauss_sigma) seg_labels[:, :, 1] = gaussian_filter(seg_labels[:, :, 1], gauss_sigma) # Standardize both channels separately (each has mean zero, unit variance) seg_labels_path = standardize_2d_prob_dist(seg_labels[:, :, 0:1]) seg_labels_endpt = standardize_2d_prob_dist(seg_labels[:, :, 1:2]) prof.tick("process_img") DEBUG = False if DEBUG: gt_path_in_img = cf_to_img(path, np.asarray([map_w, map_h])) dbg_labels_gt = np.zeros([img_w, img_h, 1]) dbg_labels_gt[:, :, 0] = tdd.plot_path_on_img(dbg_labels_gt[:, :, 0], gt_path_in_img) Presenter().show_image(dbg_labels_gt, "dbg", torch=False, waitkey=10, scale=4) Presenter().show_image(torch.from_numpy(seg_labels_path), "l_path", torch=True, waitkey=10, scale=4) Presenter().show_image(torch.from_numpy(seg_labels_endpt), "l_endp", torch=True, waitkey=100, scale=4) seg_labels = np.concatenate((seg_labels_path, seg_labels_endpt), axis=0) seg_labels_t = torch.from_numpy(seg_labels).unsqueeze(0).float() prof.tick("prep_out") prof.print_stats() return seg_labels_t
def get_item(self, env_id, set_idx, seg_idx): path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) self.latest_img_dbg = env_image data = { "images": [], "instr": [], "traj_labels": [], "affines_g_to_s": [], "lm_pos": [], "lm_indices": [], "lm_mentioned": [], "lm_visible": [], "set_idx": [], "seg_idx": [], "env_id": [] } if self.include_instr_negatives: data["neg_instr"] = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: continue affine = get_affine_matrix(start_pt, dir_yaw, self.img_w, self.img_h) if DEBUG: env_image = self.latest_img_dbg print("Start Pt: ", start_pt) print("Start Yaw: ", dir_yaw) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) seg_path = path_img[start_idx:end_idx] env_image = env_image.copy() plot_path_on_img(env_image, seg_path) seg_img_t = gen_top_down_image(env_image, affine, self.img_w, self.img_h, self.map_w, self.map_h) seg_labels_t = gen_top_down_labels(path[start_idx:end_idx], affine, self.img_w, self.img_h, self.map_w, self.map_h, self.incl_path, self.incl_endpoint) instruction_t = self.gen_instruction(instruction) aux_label = self.gen_lm_aux_labels(env_id, instruction, affine) if DEBUG: cv2.waitKey(0) if self.include_instr_negatives: neg_instruction_t = self.gen_neg_instructions(env_id, seg_idx) data["neg_instr"].append(neg_instruction_t) data["images"].append(seg_img_t) data["instr"].append(instruction_t) data["traj_labels"].append(seg_labels_t) data["affines_g_to_s"].append(affine) data["env_id"].append(env_id) data["set_idx"].append(set_idx) data["seg_idx"].append(seg_idx) data = dictlist_append(data, aux_label) return data
def plot_paths(self, segment_dataset, segment_path=None, file=None, interactive=False, bg=True, texts=[], entire_trajectory=False): if interactive: plt.ion() else: plt.ioff() if len(segment_dataset) == 0: print("Empty segment. Not plotting!") return path_key = "path" if entire_trajectory else "seg_path" env_id = segment_dataset[0]["metadata"]["env_id"] if segment_path is None: segment_path = segment_dataset[0]["metadata"][path_key] config_size = UnrealUnits().get_config_size() y_targets, x_targets = list(zip(*cf_to_img(segment_path, [512, 512]))) y_targets = np.asarray(y_targets) * config_size[1] / 512 x_targets = np.asarray(x_targets) * config_size[0] / 512 y_targets = config_size[1] - y_targets #x_targets = CONFIG_SIZE[1] - x_targets # Note that x and y are swapped #x_targets, y_targets = list(zip(*segment_path)) if entire_trajectory: instructions = [ segment_dataset[i]["instruction"] for i in range(len(segment_dataset)) ] unique_instructions = [instructions[0]] for instruction in instructions: if instruction != unique_instructions[-1]: unique_instructions.append(instruction) instruction = "; ".join(unique_instructions) else: instruction = segment_dataset[0]["instruction"] if bg: try: img = load_env_img(env_id) plt.imshow(img, extent=(0, config_size[0], 0, config_size[1])) except Exception as e: print("Error in plotting paths!") print(e) #y_targets = CONFIG_SIZE[1] - y_targets plt.plot(x_targets, y_targets, "r") plt.plot(x_targets[-1], y_targets[-1], "ro") # Plot segment endpoints #for segment in segment_dataset: # end = segment.metadata["seg_path"][-1] # end_x = end[0] # end_y = CONFIG_SIZE[1] - end[1] # plt.plot(end_y, end_x, "ro") x_actual = [] y_actual = [] for sample in segment_dataset: x_actual.append(sample["state"].state[0]) y_actual.append(sample["state"].state[1]) x_actual = np.asarray(x_actual) y_actual = np.asarray(y_actual) """if len(segment_dataset) > 0: instruction, drone_states, actions, rewards, finished = zip(*segment_dataset) drone_states = np.asarray(drone_states) x_actual = drone_states[:, 0] y_actual = drone_states[:, 1]""" plt.plot(x_actual, y_actual, "b") plt.plot(x_actual[-1], y_actual[-1], "bo") plt.axis([0, config_size[0], 0, config_size[1]]) instruction_split = self.split_lines(instruction, maxchars=40) title = "\n".join(instruction_split) plt.title("env: " + str(env_id) + " - " + title) x = 10 y = 5 for text in texts: if not DONT_DRAW_TEXT: plt.text(x, y, text) y += 40 y += len(instruction_split) * 40 + 40 for line in instruction_split: if not DONT_DRAW_TEXT: plt.text(x, y, line) y -= 40 if interactive: plt.show() plt.pause(0.0001)