def get_top_down_image_env(self, env_id, egocentric=False): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :return: """ path = load_path(env_id) env_image_in = load_env_img(env_id, self.map_w, self.map_h) # If we need to return a bigger image resolution than we loaded if self.map_w != self.img_w or self.map_h != self.img_h: env_image = np.zeros( [self.img_h, self.img_w, env_image_in.shape[2]]) env_image[0:self.map_h, 0:self.map_w, :] = env_image_in else: env_image = env_image_in #path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) #self.plot_path_on_img(env_image, path_img) env_image = standardize_image(env_image) env_img_t = torch.from_numpy(env_image).unsqueeze(0).float() #presenter = Presenter() #presenter.show_image(env_img_t[0], "data_img", torch=True, scale=1) return env_img_t
def analyze_instruction_set(name, iset, corpus, merge_len): token_lengths = [] demo_lengths = [] token2word, word2token = get_word_to_token_map(corpus) for e, instr_sets in iset.items(): segs = instr_sets[0]["instructions"] if len(segs) > 0: full_path = load_path(e) for seg in segs: if seg["merge_len"] != merge_len: continue tok_i = tokenize_instruction(seg['instruction'], word2token) start_idx = seg["start_idx"] end_idx = seg["end_idx"] seg_path = full_path[start_idx:end_idx] demo_len = path_length(seg_path) demo_lengths.append(demo_len) token_lengths.append(len(tok_i)) avg_tok_len = sum(token_lengths) / len(token_lengths) avg_pth_len = sum(demo_lengths) * 4.7 / (len(demo_lengths) * 1000) print("Dataset: ", name) print(" {} & {} & {:.2f} & {:.2f}".format(len(iset), len(token_lengths), avg_tok_len, avg_pth_len))
def generate_random_wrong_path(env_id, start_idx, end_idx): env_config = load_env_config(env_id) current_path = load_path(env_id)[start_idx:end_idx] start_pos = current_path[0] landmark_locations = np.asarray( list(zip(env_config["xPos"], env_config["zPos"]))) distances = np.asarray( [np.linalg.norm(start_pos - p) for p in landmark_locations]) closest_lm_idx = np.argmin(distances) start_landmark = env_config["landmarkName"][closest_lm_idx] # For segment-level, we're never (if ever) gonna need more than 3 landmarks global NUM_LANDMARKS_VISISTED, DRONE_EDGE_CLEARANCE NUM_LANDMARKS_VISISTED = 3 DRONE_EDGE_CLEARANCE = 0 i = 0 while True: print(f"Attempt: {i}") i += 1 ret = try_make_curve(env_config, start_pos, start_landmark) if ret is not None: break pos_array, last_pos, last_landmark_visited = ret # Return a trajectory of the same length as the one which is being replaced return convert_path(pos_array[:(end_idx - start_idx)])
def get_top_down_image(self, env_id, set_idx, seg_idx): """ To be called externally to retrieve a top-down environment image oriented with the start of the requested segment :param env_id: environment id :param set_idx: instruction set number :param seg_idx: segment index :return: """ # TODO: Revise the bazillion versions of poses - get rid of this specific one path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) plot_path_on_img(env_image, path_img) seg = self.all_instr[env_id][set_idx]["instructions"][seg_idx] start_idx = seg["start_idx"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: return None affine = get_affine_matrix(start_pt, dir_yaw) seg_img_t = self.gen_top_down_image(env_image, affine) #seg_img_t = seg_img_t.permute(0, 1, 3, 2) # A 2D pose is specified as [pos_x, pos_y, yaw] # A 3D pose would be [pos_x, pos_y, pos_z, r_x, r_y, r_z, r_w] img_pose_2d = {"pos": start_pt, "yaw": dir_yaw} img_pose_2d_t = torch.FloatTensor([start_pt[0], start_pt[1], dir_yaw]).unsqueeze(0) return seg_img_t, img_pose_2d_t
def faux_dataset_random_landmark(eval_envs): print("Generating faux dataset") units = UnrealUnits(scale=1.0) dataset = [] for env_id in eval_envs: segment_dataset = [] config = load_env_config(env_id) template = load_template(env_id) path = load_path(env_id) landmark_radii = config["radius"] start_pt = np.asarray(config["startPos"]) second_pt = np.asarray(config["startHeading"]) landmark_choice_ids = list(range(len(config["landmarkName"]))) choice_id = random.choice(landmark_choice_ids) target_x = config["xPos"][choice_id] target_y = config["zPos"][choice_id] target_lm_pos = np.asarray([target_x, target_y]) landmark_dir = target_lm_pos - start_pt method = template["side"] theta = math.atan2(landmark_dir[1], landmark_dir[0]) + math.pi if method == "infront": theta = theta elif method == "random": theta = random.random() * 2 * math.pi elif method == "behind": theta = theta + math.pi elif method == "left": theta = theta - math.pi / 2 elif method == "right": theta = theta + math.pi / 2 x, z = target_lm_pos[0], target_lm_pos[1] landmark_radius = landmark_radii[choice_id] sample_point = np.array([ x + math.cos(theta) * landmark_radius, z + math.sin(theta) * landmark_radius ]) state1 = DroneState(None, start_pt) state2 = DroneState(None, second_pt) state3 = DroneState(None, sample_point) segment_dataset.append(bare_min_sample(state1, False, env_id)) segment_dataset.append(bare_min_sample(state2, False, env_id)) segment_dataset.append(bare_min_sample(state3, True, env_id)) dataset.append(segment_dataset) return dataset
def get_top_down_ground_truth_static_ego(env_id, start_idx, img_w, img_h, map_w, map_h): """ Returns the ground-truth label oriented in the global map frame :param env_id: :param start_idx: :param img_w: :param img_h: :param map_w: :param map_h: :return: """ path = load_path(env_id) #instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] start_pt, dir_yaw = tdd.get_start_pt_and_yaw(path, start_idx, map_w, map_h, 0) affine = tdd.get_affine_matrix(start_pt, dir_yaw, img_w, img_h) seg_labels = np.zeros([img_w, img_h, 2]).astype(float) path_in_img = cf_to_img(path, np.array([map_w, map_h])) #gauss_sigma = map_w / 96 gauss_sigma = map_w / 32 seg_labels[:, :, 0] = tdd.plot_path_on_img(seg_labels[:, :, 0], path_in_img) if len(path_in_img) > 1: seg_labels[:, :, 1] = tdd.plot_dot_on_img(seg_labels[:, :, 1], path_in_img[-1], gauss_sigma) seg_labels_rot = tdd.apply_affine(seg_labels, affine, img_w, img_h) seg_labels_rot[:, :, 0] = gaussian_filter(seg_labels_rot[:, :, 0], gauss_sigma) seg_labels_rot[:, :, 1] = gaussian_filter(seg_labels_rot[:, :, 1], gauss_sigma) DEBUG = True if DEBUG: cv2.imshow("l_traj", seg_labels_rot[:, :, 0]) cv2.imshow("l_endpt", seg_labels_rot[:, :, 1]) cv2.waitKey(0) # Standardize both channels separately (each has mean zero, unit variance) seg_labels_path = standardize_2d_prob_dist(seg_labels_rot[:, :, 0:1]) seg_labels_endpt = standardize_2d_prob_dist(seg_labels_rot[:, :, 1:2]) seg_labels_rot = np.concatenate((seg_labels_path, seg_labels_endpt), axis=0) seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() return seg_labels_t
def get_path_end_indices(env_id, entry): num_segments = len(entry["instructions"]) path = load_path(env_id) indices = [] for i in range(num_segments): end_x = entry["end_x"][i] end_z = entry["end_z"][i] end_point = np.asarray([end_x, end_z]) # Convert from Unity-specific to normalized coordinates # TODO FIXME: This check is a potential source of future bugs! # The real language data from unity includes ground truth moves. The templated one doesn't # Unity data stores end coordinate in unity coords, templated data in config coords. if len(entry["moves"]) > 0: end_point = (end_point - [225, 225]) * (1000 / 50) end_idx = int(get_closest_point_idx(path, end_point)) indices.append(end_idx) return indices
def evaluate_rollout(self, rollout): last_sample = rollout[-1] env_id = last_sample["metadata"]["env_id"] seg_idx = last_sample["metadata"]["seg_idx"] set_idx = last_sample["metadata"]["set_idx"] # TODO: Allow multiple instruction sets / paths per env path = load_path(env_id) if self.entire_trajectory: path_end_idx = len(path) - 1 else: # Find the segment end index path_end_idx = self.all_i[env_id][set_idx]["instructions"][seg_idx]["end_idx"] if path_end_idx > len(path) - 1: path_end_idx = len(path) - 1 end_pos = np.asarray(last_sample["state"].get_pos()) target_end_pos = np.asarray(path[path_end_idx]) end_dist = np.linalg.norm(end_pos - target_end_pos) success = end_dist < DEFAULT_PASSING_DISTANCE if last_sample["metadata"]["pol_action"][3] > 0.5: who_stopped = "Policy Stopped" elif last_sample["metadata"]["ref_action"][3] > 0.5: who_stopped = "Oracle Stopped" else: who_stopped = "Veered Off" result = "Success" if success else "Fail" texts = [who_stopped, result, "run:" + self.run_name] print(seg_idx, result) if self.save_images: dir = get_results_dir(self.run_name, makedir=True) print("Results dir: ", dir) self.presenter.plot_paths(rollout, interactive=False, texts=texts, entire_trajectory=self.entire_trajectory) filename = os.path.join(dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx)) if self.custom_instr is not None: filename += "_" + last_sample["metadata"]["instruction"][:24] + "_" + last_sample["metadata"]["instruction"][-16:] self.presenter.save_plot(filename) self.save_results() return ResultsLandmarkSide(success, end_dist)
def set_environment(self, env_id, instruction_set=None, fast=False): """ Switch the simulation to env_id. Causes the environment configuration from configs/configs/random_config_<env_id>.json to be loaded and landmarks arranged in the simulator :param env_id: integer ID :param instruction_set: Instruction set to follow for displaying instructions :param fast: Set to True to skip a delay at a risk of environment not loading before subsequent function calls :return: """ self.env_id = env_id self.drone.set_current_env_id(env_id, self.instance_id) self.drone.reset_environment() # This is necessary to allow the new frame to be rendered with the new pomdp, so that the drone doesn't # accidentally see the old pomdp at the start if not fast: time.sleep(0.1) self.current_segment = 0 self.seg_start = 0 self.seg_end = 0 self.path = load_path(env_id) if self.path is not None: self.reward = FollowPathReward(self.path) self.reward_shaping = ImitationReward(self.path) self.seg_end = len(self.path) - 1 self.instruction_set = instruction_set if instruction_set is not None: self.reward.set_current_segment(self.seg_start, self.seg_end) if len(instruction_set) == 0: print("OOOPS! Instruction set of length 0!" + str(env_id)) return self.seg_end = instruction_set[self.current_segment]["end_idx"] if self.seg_end >= len(self.path): print("OOOPS! For env " + str(env_id) + " segment " + str(self.current_segment) + " end oob: " + str(self.seg_end)) self.seg_end = len(self.path) - 1
def evaluate_rollout(self, rollout): last_sample = rollout[-1] env_id = last_sample["metadata"]["env_id"] seg_idx = last_sample["metadata"]["seg_idx"] set_idx = last_sample["metadata"]["set_idx"] # TODO: Allow multiple templates / instructions per env path = load_path(env_id) end_pos = np.asarray(last_sample["state"].get_pos()) landmark_pos = self.get_landmark_pos(env_id) target_end_pos = np.asarray(path[-1]) end_goal_dist = np.linalg.norm(end_pos - target_end_pos) end_lm_dist = np.linalg.norm(end_pos - landmark_pos) correct_landmark_region = end_lm_dist < LANDMARK_REGION_RADIUS correct_quadrant = self.correct_side(rollout, env_id) if last_sample["metadata"]["pol_action"][3] > 0.5: who_stopped = "Policy Stopped" elif last_sample["metadata"]["ref_action"][3] > 0.5: who_stopped = "Oracle Stopped" else: who_stopped = "Veered Off" success = correct_landmark_region and correct_quadrant side_txt = "Correct landmark" if correct_landmark_region else "Wrong landmark" result = "Success" if success else "Fail" texts = [who_stopped, result, side_txt, "run:" + self.run_name] if self.save_images: dir = get_results_dir(self.run_name, makedir=True) self.presenter.plot_paths(rollout, interactive=False, texts=[]) #texts) filename = os.path.join( dir, str(env_id) + "_" + str(set_idx) + "_" + str(seg_idx)) self.presenter.save_plot(filename) self.save_results() return ResultsLandmarkSide(success, end_goal_dist, correct_landmark_region)
def provider_goal_pos_map(segment_data, data): """ Data provider that gives the positions and indices of all landmarks visible in the FPV image. :param segment_data: segment dataset for which to provide data :return: ("lm_pos", lm_pos) - lm_pos is a list (over timesteps) of lists (over landmarks visible in image) of the landmark locations in image pixel coordinates ("lm_indices", lm_indices) - lm_indices is a list (over timesteps) of lists (over landmarks visible in image) of the landmark indices for every landmark included in lm_pos. These are the landmark classifier labels """ env_id = segment_data[0]["metadata"]["env_id"] path = load_path(env_id) traj_len = len(segment_data) goal_loc = [] for timestep in range(traj_len): if segment_data[timestep] is None: goal_loc.append(np.asarray([0.0, 0.0])) continue set_idx = segment_data[timestep]["metadata"]["set_idx"] seg_idx = segment_data[timestep]["metadata"]["seg_idx"] seg = get_instruction_segment(env_id, set_idx, seg_idx) end_idx = seg["end_idx"] if end_idx < len(path): end_pt = path[end_idx] else: end_pt = path[-1] goal_as = __get_goal_location_airsim(end_pt) goal_loc.append(goal_as) goal_loc = np.asarray(goal_loc) goal_loc_t = torch.from_numpy(goal_loc).float() return [("goal_loc", goal_loc_t)]
def dyn_gt_test(): presenter = Presenter() train_instr, dev_instr, test_instr, corpus = get_all_instructions() all_instr = {**train_instr, **dev_instr, **test_instr} for i in range(10): path = load_path(i) segments = all_instr[i][0]["instructions"] for seg in segments: start_idx = seg["start_idx"] end_idx = seg["end_idx"] randInt = random.randint(10, 100) start_pose = Pose(path[start_idx] - randInt, 0) if end_idx - start_idx > 0: randInt = random.randint(10, 100) new_path = get_dynamic_ground_truth( path[start_idx:end_idx], (path[start_idx] - randInt)) new_path1 = get_dynamic_ground_truth_smooth( path[start_idx:end_idx], (path[start_idx] - randInt)) presenter.plot_path( i, [path[start_idx:end_idx], new_path, new_path1])
def provider_rot_top_down_images(segment_data, data): env_id = segment_data.metadata[0]["env_id"] path = load_path(env_id) env_image = load_env_img(env_id, 256, 256) top_down_images = [] top_down_labels = [] for md in segment_data.metadata: if md is None: break set_idx = md["set_idx"] seg_idx = md["seg_idx"] instr_seg = get_instruction_segment(env_id, set_idx, seg_idx) start_idx = instr_seg["start_idx"] end_idx = instr_seg["end_idx"] start_pt, dir_yaw = tdd.get_start_pt_and_yaw(path, start_idx, 256, 256, 0) affine = tdd.get_affine_matrix(start_pt, dir_yaw, 512, 512) seg_img_t = tdd.gen_top_down_image(env_image, affine, 512, 512, 256, 256) seg_labels_t = tdd.gen_top_down_labels(path[start_idx:end_idx], affine, 512, 512, 256, 256, True, True) seg_labels_t = F.max_pool2d(Variable(seg_labels_t), 8).data top_down_images.append(seg_img_t) top_down_labels.append(seg_labels_t) tdimg_t = torch.cat(top_down_images, dim=0) tdlab_t = torch.cat(top_down_labels, dim=0) return [("top_down_images", tdimg_t), ("traj_ground_truth", tdlab_t)]
def get_item(self, env_id, set_idx, seg_idx): path = load_path(env_id) env_image = load_env_img(env_id, self.map_w, self.map_h) self.latest_img_dbg = env_image data = { "images": [], "instr": [], "traj_labels": [], "affines_g_to_s": [], "lm_pos": [], "lm_indices": [], "lm_mentioned": [], "lm_visible": [], "set_idx": [], "seg_idx": [], "env_id": [] } if self.include_instr_negatives: data["neg_instr"] = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] start_pt, dir_yaw = get_start_pt_and_yaw(path, start_idx, self.map_w, self.map_h, self.yaw_rand_range) if start_pt is None: continue affine = get_affine_matrix(start_pt, dir_yaw, self.img_w, self.img_h) if DEBUG: env_image = self.latest_img_dbg print("Start Pt: ", start_pt) print("Start Yaw: ", dir_yaw) path_img = cf_to_img(path, [env_image.shape[0], env_image.shape[1]]) seg_path = path_img[start_idx:end_idx] env_image = env_image.copy() plot_path_on_img(env_image, seg_path) seg_img_t = gen_top_down_image(env_image, affine, self.img_w, self.img_h, self.map_w, self.map_h) seg_labels_t = gen_top_down_labels(path[start_idx:end_idx], affine, self.img_w, self.img_h, self.map_w, self.map_h, self.incl_path, self.incl_endpoint) instruction_t = self.gen_instruction(instruction) aux_label = self.gen_lm_aux_labels(env_id, instruction, affine) if DEBUG: cv2.waitKey(0) if self.include_instr_negatives: neg_instruction_t = self.gen_neg_instructions(env_id, seg_idx) data["neg_instr"].append(neg_instruction_t) data["images"].append(seg_img_t) data["instr"].append(instruction_t) data["traj_labels"].append(seg_labels_t) data["affines_g_to_s"].append(affine) data["env_id"].append(env_id) data["set_idx"].append(set_idx) data["seg_idx"].append(seg_idx) data = dictlist_append(data, aux_label) return data
def get_top_down_ground_truth_dynamic_global(env_id, start_idx, end_idx, drone_pos_as, img_w, img_h, map_w, map_h): """ Returns the ground-truth label oriented in the global map frame :param env_id: :param start_idx: :param img_w: :param img_h: :param map_w: :param map_h: :return: """ PROFILE = False prof = SimpleProfiler(False, PROFILE) path = load_path(env_id, anno=True) #print(len(path), start_idx, end_idx) path = path[start_idx:end_idx] #instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] prof.tick("load_path") units = UnrealUnits(1.0) drone_pos_cf = units.pos3d_from_as(drone_pos_as) #print("Dynamic ground truth for ", env_id, start_idx, end_idx) gt_dynamic = get_dynamic_ground_truth_v2(path, drone_pos_cf[:2]) #Presenter().plot_path(env_id, [path[start_idx:end_idx], gt_dynamic]) prof.tick("gen_gt_path") seg_labels = np.zeros([img_w, img_h, 2]).astype(float) path_in_img = cf_to_img(gt_dynamic, np.array([map_w, map_h])) gauss_sigma = map_w / 96 seg_labels[:, :, 0] = tdd.plot_path_on_img(seg_labels[:, :, 0], path_in_img) if len(path_in_img) > 1: seg_labels[:, :, 1] = tdd.plot_dot_on_img(seg_labels[:, :, 1], path_in_img[-1], gauss_sigma) prof.tick("plot_path") seg_labels[:, :, 0] = gaussian_filter(seg_labels[:, :, 0], gauss_sigma) seg_labels[:, :, 1] = gaussian_filter(seg_labels[:, :, 1], gauss_sigma) # Standardize both channels separately (each has mean zero, unit variance) seg_labels_path = standardize_2d_prob_dist(seg_labels[:, :, 0:1]) seg_labels_endpt = standardize_2d_prob_dist(seg_labels[:, :, 1:2]) prof.tick("process_img") DEBUG = False if DEBUG: gt_path_in_img = cf_to_img(path, np.asarray([map_w, map_h])) dbg_labels_gt = np.zeros([img_w, img_h, 1]) dbg_labels_gt[:, :, 0] = tdd.plot_path_on_img(dbg_labels_gt[:, :, 0], gt_path_in_img) Presenter().show_image(dbg_labels_gt, "dbg", torch=False, waitkey=10, scale=4) Presenter().show_image(torch.from_numpy(seg_labels_path), "l_path", torch=True, waitkey=10, scale=4) Presenter().show_image(torch.from_numpy(seg_labels_endpt), "l_endp", torch=True, waitkey=100, scale=4) seg_labels = np.concatenate((seg_labels_path, seg_labels_endpt), axis=0) seg_labels_t = torch.from_numpy(seg_labels).unsqueeze(0).float() prof.tick("prep_out") prof.print_stats() return seg_labels_t
def ground_terms(word2id, clustered_corpus, landmark_names, train_instructions): # the clustered corpus is a dictionary of lists, where the keys are valid english words and the values are # lists of words found in the corpus that are assumed to be misspellings of the key valid words # We make the distinction that a word is any word in an instruction # Terms are words in the english vocabulary. Multiple words (misspellings) can map to a single term. num_terms = len(clustered_corpus) vocab_size = len(word2id) num_landmarks = len(landmark_names) # This is gonna be the new word2id, once we start using the thesaurus term2id = {} id2term = {} for i, term in enumerate(sorted(clustered_corpus.keys())): term2id[term] = i id2term[i] = term # Calculate the mutual information between each cluster and each landmark # Number of times each term appears in an instruction term_occurences = np.zeros(num_terms) # Number of times each landmark appears near a segment path landmark_occurences = np.zeros(num_landmarks) # The number of times each term and landmark combination appears in the instruction and near the path term_landmark_cooccurences = np.zeros((num_terms, num_landmarks)) # The number of total segments that were considered total_occurences = 0 landmark_indices = get_landmark_name_to_index() # Inverse the clusters so that we can efficiently map each word in each instruction to it's cluster core word2term = {} for real_word, misspellings in clustered_corpus.items(): for misspelling in misspellings: word2term[misspelling] = real_word # Count landmark and word occurences and co-occurences for env_id, instruction_sets in train_instructions.items(): path = load_path(env_id) env_config = load_env_config(env_id) for instruction_set in instruction_sets[0]["instructions"]: instruction_str = instruction_set["instruction"] start_idx = instruction_set["start_idx"] end_idx = instruction_set["end_idx"] present_landmarks = close_landmark_names(env_config, path, start_idx, end_idx) present_lm_indices = [ landmark_indices[lm] for lm in present_landmarks ] mentioned_words = split_instruction( clean_instruction(instruction_str)) mentioned_terms = words_to_terms(mentioned_words, word2term) for term in mentioned_terms: term_id = term2id[term] term_occurences[term_id] += 1 for lm_idx in present_lm_indices: landmark_occurences[lm_idx] += 1 for term in mentioned_terms: term_id = term2id[term] term_landmark_cooccurences[term_id][lm_idx] += 1 total_occurences += 1 term_prob = np.expand_dims(term_occurences / total_occurences, 1).repeat(num_landmarks, 1) landmark_prob = np.expand_dims(landmark_occurences / total_occurences, 0).repeat(num_terms, 0) term_and_landmark_prob = term_landmark_cooccurences / total_occurences # term_and_landmark_prob has dimensions 0: terms, 1: landmarks mutual_info_factor = term_and_landmark_prob / (landmark_prob * term_prob + 1e-27) #mutual_info_factor = term_and_landmark_prob / ((1 / num_landmarks) * term_prob + 1e-9) mutual_info = term_and_landmark_prob * np.log(mutual_info_factor + 1e-27) # The above line is the correct formula for mutual information. For our case, below formula might be better? # The mutual information is higher for common words than uncommon ones. We might prefer the opposite effect. # On the other hand, uncommon words are more likely to spuriously correlate with landmarks, which will cause a # less reliable corpus. #mutual_info = np.log(mutual_info_factor + 1e-27) # Ground each term and produce the thesaurus term_meanings = {} common_words = [] for i in range(num_terms): grounded_lm_indices = [ idx for idx in range(num_landmarks) if mutual_info[i][idx] > MUTUAL_INFO_THRESHOLD ] grounded_lm_names = [ landmark_names[idx] for idx in grounded_lm_indices ] mutual_infos = np.asarray( [mutual_info[i][idx] for idx in grounded_lm_indices]) args = list(np.argsort(mutual_infos)) grounded_lm_names = list( reversed([grounded_lm_names[idx] for idx in args])) mutual_infos = list(reversed([mutual_infos[idx] for idx in args])) # If the word is too common to be referring to a landmark, ignore ita this_term_prob = term_prob[i][0] if this_term_prob > MAX_TERM_PROB: common_words.append(id2term[i]) grounded_lm_names = [] mutual_infos = [] term_meanings[id2term[i]] = \ { "landmarks": grounded_lm_names, "mutual_info": mutual_infos, "term_prob": this_term_prob } for k in term_meanings.keys(): if len(term_meanings[k]["landmarks"]) > 0: print(k, term_meanings[k]) print("Ignored groundings for these common words: " + str(common_words)) return term_meanings, word2term
def __call__(self, images, states, segment_data, mask): projector = PinholeProjector(img_x=images.size(3), img_y=images.size(2)) # presenter = Presenter() env_id = segment_data.metadata[0]["env_id"] conf_json = load_env_config(env_id) all_landmark_indices = get_landmark_name_to_index() landmark_names, landmark_indices, landmark_pos = get_landmark_locations_airsim(conf_json) path_array = load_path(env_id) goal_loc = self.__get_goal_location_airsim(path_array) # Traj length x 64 landmarks x 14 # 0-5: Present landmarks data # 0 - landmark present in img # 1-2 - landmark pix_x | pix_y # 3-5 - landmark world coords m_x | m_y # 6-7: Template data # 6 - landmark_mentioned index # 7 - mentioned_side index # 8 - landmark mentioned # 9-13: Goal data # 9-10 - goal_x_pix | goal_y_pix # 11-12 - goal_x | goal_y (world) # 13 - goal visible aux_labels = torch.zeros((images.size(0), len(all_landmark_indices), 14)) # Store goal location in airsim coordinates aux_labels[:, :, 11:13] = torch.from_numpy(goal_loc[0:2]).unsqueeze(0).unsqueeze(0).expand_as( aux_labels[:, :, 11:13]) for i, idx in enumerate(landmark_indices): aux_labels[:, idx, 3:6] = torch.from_numpy( landmark_pos[i]).unsqueeze(0).clone().repeat(aux_labels.size(0), 1, 1) for timestep in range(images.size(0)): # presenter.save_image(images[timestep], name="tmp.png", torch=True) if mask[timestep] == 0: continue cam_pos = states[timestep, 9:12] cam_rot = states[timestep, 12:16] goal_in_img, goal_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot, goal_loc) if goal_in_img is not None: aux_labels[timestep, :, 9:11] = torch.from_numpy(goal_in_img[0:2]).unsqueeze(0).expand_as( aux_labels[timestep, :, 9:11]) aux_labels[timestep, :, 13] = 1.0 for i, landmark_world in enumerate(landmark_pos): landmark_idx = landmark_indices[i] landmark_in_img, landmark_in_cam, status = projector.world_point_to_image(cam_pos, cam_rot, landmark_world) # This is None if the landmark is behind the camera. if landmark_in_img is not None: # presenter.save_image(images[timestep], name="tmp.png", torch=True, draw_point=landmark_in_img) aux_labels[timestep, landmark_idx, 0] = 1.0 aux_labels[timestep, landmark_idx, 1:3] = torch.from_numpy(landmark_in_img[0:2]) # aux_labels[timestep, landmark_idx, 3:6] = torch.from_numpy(landmark_in_cam[0:3]) # aux_labels[timestep, landmark_idx, 8] = 1.0 if landmark_idx == mentioned_landmark_idx else 0 return aux_labels
def __getitem__(self, idx): if self.seg_level: env_id = self.seg_list[idx][0] set_idx = self.seg_list[idx][1] seg_idx = self.seg_list[idx][2] else: env_id = self.env_list[idx] print("top_down_dataset_sm __getitem__ load_env_config") env_conf_json = load_env_config(env_id) landmark_names, landmark_indices, landmark_positions = get_landmark_locations_airsim(env_conf_json) top_down_image = load_env_img(env_id) path = load_path(env_id) img_x = top_down_image.shape[0] img_y = top_down_image.shape[1] path_in_img_coords = self.cf_to_img(img_x, path) landmark_pos_in_img = self.as_to_img(img_x, np.asarray(landmark_positions)[:, 0:2]) self.pos_rand_image = self.pos_rand_range * img_x #self.plot_path_on_img(top_down_image, path_in_img_coords) #self.plot_path_on_img(top_down_image, landmark_pos_in_img) #cv2.imshow("top_down", top_down_image) #cv2.waitKey() input_images = [] input_instructions = [] label_images = [] aux_labels = [] # Somehow load the instruction with the start and end indices for each of the N segments if self.seg_level: instruction_segments = [self.all_instr[env_id][set_idx]["instructions"][seg_idx]] else: instruction_segments = self.all_instr[env_id][0]["instructions"] for seg_idx, seg in enumerate(instruction_segments): start_idx = seg["start_idx"] end_idx = seg["end_idx"] instruction = seg["instruction"] # TODO: Check for overflowz seg_path = path_in_img_coords[start_idx:end_idx] seg_img = top_down_image.copy() #test_plot = self.plot_path_on_img(seg_img, seg_path) # TODO: Validate the 0.5 choice, should it be 2? affine, cropsize = self.get_affine_matrix(seg_path, 0, [int(img_x / 2), int(img_y / 2)], 0.5) if affine is None: continue seg_img_rot = self.apply_affine(seg_img, affine, cropsize) seg_labels = np.zeros_like(seg_img[:, :, 0:1]).astype(float) seg_labels = self.plot_path_on_img(seg_labels, seg_path) seg_labels = gaussian_filter(seg_labels, 4) seg_labels_rot = self.apply_affine(seg_labels, affine, cropsize) #seg_labels_rot = gaussian_filter(seg_labels_rot, 4) seg_labels_rot = self.normalize_0_1(seg_labels_rot) # Change to true to visualize the paths / labels if False: cv2.imshow("rot_img", seg_img_rot) cv2.imshow("seg_labels", seg_labels_rot) rot_viz = seg_img_rot.astype(np.float64) / 512 rot_viz[:, :, 0] += seg_labels_rot.squeeze() cv2.imshow("rot_viz", rot_viz) cv2.waitKey(0) tok_instruction = tokenize_instruction(instruction, self.word2token) instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0) # Get landmark classification labels landmark_pos_in_seg_img = self.apply_affine_on_pts(landmark_pos_in_img, affine) # Down-size images and labels if requested by the model if self.img_scale != 1.0: seg_img_rot = transform.resize( seg_img_rot, [seg_img_rot.shape[0] * self.img_scale, seg_img_rot.shape[1] * self.img_scale], mode="constant") seg_labels_rot = transform.resize( seg_labels_rot, [seg_labels_rot.shape[0] * self.img_scale, seg_labels_rot.shape[1] * self.img_scale], mode="constant") landmark_pos_in_seg_img = landmark_pos_in_seg_img * self.img_scale seg_img_rot = standardize_image(seg_img_rot) seg_labels_rot = standardize_image(seg_labels_rot) seg_img_t = torch.from_numpy(seg_img_rot).unsqueeze(0).float() seg_labels_t = torch.from_numpy(seg_labels_rot).unsqueeze(0).float() landmark_pos_t = torch.from_numpy(landmark_pos_in_seg_img).unsqueeze(0) landmark_indices_t = torch.LongTensor(landmark_indices).unsqueeze(0) mask1 = torch.gt(landmark_pos_t, 0) mask2 = torch.lt(landmark_pos_t, seg_img_t.size(2)) mask = mask1 * mask2 mask = mask[:, :, 0] * mask[:, :, 1] mask = mask landmark_pos_t = torch.masked_select(landmark_pos_t, mask.unsqueeze(2).expand_as(landmark_pos_t)).view([-1, 2]) landmark_indices_t = torch.masked_select(landmark_indices_t, mask).view([-1]) mentioned_names, mentioned_indices = get_mentioned_landmarks(self.thesaurus, instruction) mentioned_labels_t = empty_float_tensor(list(landmark_indices_t.size())).long() for i, landmark_idx_present in enumerate(landmark_indices_t): if landmark_idx_present in mentioned_indices: mentioned_labels_t[i] = 1 aux_label = { "landmark_pos": landmark_pos_t, "landmark_indices": landmark_indices_t, "landmark_mentioned": mentioned_labels_t, "visible_mask": mask, } if self.include_instr_negatives: # If we are to be using similar instructions according to the json file, then # initialize choices with similar instructions. Otherwise let choices be empty, and they will # be filled in the following lines. if self.instr_negatives_similar_only: choices = self.similar_instruction_map[str(env_id)][str(seg_idx)] else: choices = [] # If there are no similar instructions to this instruction, pick a completely random instruction if len(choices) == 0: while len(choices) == 0: env_options = list(self.similar_instruction_map.keys()) random_env = random.choice(env_options) seg_options = list(self.similar_instruction_map[random_env].keys()) if len(seg_options) == 0: continue random_seg = random.choice(seg_options) choices = self.similar_instruction_map[random_env][random_seg] pick = random.choice(choices) picked_env = pick["env_id"] picked_seg = pick["seg_idx"] picked_set = pick["set_idx"] picked_instruction = self.all_instr[picked_env][picked_set]["instructions"][picked_seg]["instruction"] tok_fake_instruction = tokenize_instruction(picked_instruction, self.word2token) aux_label["negative_instruction"] = torch.LongTensor(tok_fake_instruction).unsqueeze(0) input_images.append(seg_img_t) input_instructions.append(instruction_t) label_images.append(seg_labels_t) aux_labels.append(aux_label) return [input_images, input_instructions, label_images, aux_labels]
def roll_out_on_env(self, params, instructions_set, set_idx, only_seg_idx=None, custom_instr=None): env_dataset = [] failed = False env_id = instructions_set["env"] self.env.set_environment( env_id, instruction_set=instructions_set['instructions']) path = load_path(env_id) params.initPolicyContext(env_id, path) import rollout.run_metadata as md segments = list(instructions_set['instructions']) # all segments with at least length 2 valid_segments = [ (segments[i], i) for i in range(len(segments)) if segments[i]["end_idx"] - segments[i]["start_idx"] >= 2 ] if len(valid_segments) == 0: print("Ding dong!") first_seg = True # For recurrent policy, we need to explicity start a segment and reset the LSTM state # TODO: Make sure this still works for the older non-NL model params.policy.start_sequence() for segment, seg_idx in valid_segments: if only_seg_idx is not None and seg_idx != only_seg_idx: print("Skipping seg: " + str(seg_idx) + " as not requested") continue if params.segment_level: params.policy.start_sequence() segment_dataset = [] # Decide when to switch policies switch_threshold = params.horizon + 1 # Never switch policies by default do_switch = random.uniform(0, 1) < params.switch_prob if do_switch and params.threshold_strategy == SwitchThresholdStrategy.UNIFORM: switch_threshold = random.uniform(0, params.horizon) string_instruction, end_idx, start_idx = segment[ "instruction"], segment["end_idx"], segment["start_idx"] # Manual instruction override to allow rolling out arbitrary instructions for debugging if custom_instr is not None: print("REPLACED: ", string_instruction) string_instruction = custom_instr print("INSTRUCTION:", string_instruction) # Set some global parameters that can be accessed by the model and other parts of the system md.IS_ROLLOUT = True md.RUN_NAME = params.run_name md.ENV_ID = env_id md.SET_IDX = set_idx md.SEG_IDX = seg_idx md.START_IDX = start_idx md.END_IDX = end_idx md.INSTRUCTION = string_instruction if hasattr(params.policy, "start_segment_rollout"): params.policy.start_segment_rollout() token_instruction = self.tokenize_string(string_instruction) # At the end of segment N, should we reset drone position to the start of segment N+1 or continue # rolling out seamlessly? if first_seg or params.shouldResetAlways() or ( failed and params.shouldResetIfFailed()): state = self.env.reset(seg_idx) #instr_str = debug_untokenize_instruction(instruction) #Presenter().show_instruction(string_instruction.replace(" ", " ")) failed = False first_seg = False sleep(sleepytime) # Tell the oracle which part of the path is currently being executed params.setCurrentSegment(start_idx, end_idx) step_num = 0 total_reward = 0 # If the path has been finished according to the oracle, allow rolling out STEPS_TO_KILL more steps # If we finish the segment, but don't stop, log the position at which we finish the segment oracle_finished_countdown = params.steps_to_kill # Finally the actual policy roll out on the path segment! while True: # Get oracle action (labels) ref_action = params.ref_policy.get_action( state, token_instruction) if ref_action is None or step_num == params.horizon: failed = True # Either veered off too far, or ran out of time. Either way, we consider it a fail print("Failed segment") break # Get the policy action (actions to be rolled out) action = params.policy.get_action( state, token_instruction) #, env_id=env_id) if action is None: print("POLICY PRODUCED None ACTION") break # Choose which action to execute (reference or policy) based on the selected procedure exec_action = self.choose_action(params, step_num, switch_threshold, ref_action, action) # action = [vel_x, vel_y, vel_yaw] vel_y is unused currently. Execute the action in the pomdp state, reward, done = self.env.step(exec_action) total_reward += reward # Collect the data into a dataset sample = { "instruction": string_instruction, "state": state, "ref_action": ref_action, "reward": reward, "done": done, "metadata": { "seg_path": path[start_idx:end_idx + 1], "path": path, "env_id": env_id, "set_idx": set_idx, "seg_idx": seg_idx, "start_idx": start_idx, "end_idx": end_idx, "action": exec_action, "pol_action": action, "ref_action": ref_action, "instruction": string_instruction, "flag": params.getFlag() } } segment_dataset.append(sample) if not params.isSegmentLevel(): env_dataset.append(sample) # Do visual feedback and logging if params.first_person: self.presenter.show_sample(state, exec_action, reward, string_instruction) if params.plot: self.presenter.plot_paths(segment_dataset, interactive=True) if params.save_samples: file_path = params.getSaveSamplesPath( env_id, set_idx, seg_idx, step_num) self.presenter.save_sample(file_path, state, exec_action, reward, string_instruction) if params.show_action: self.presenter.show_action(ref_action, "ref_action") self.presenter.show_action(exec_action, "exec_action") # If the policy is finished, we stop. Otherwise the oracle should just keep outputing # examples that say that the policy should output finished at this point if exec_action[3] > 0.5 and not params.shouldIgnorePolicyStop( ): print("Policy stop!") break # If oracle says we're finished, allow a number of steps before terminating. if ref_action[3] > 0.5: if oracle_finished_countdown == params.steps_to_kill: drone_pos_force_stop = state.get_pos() oracle_finished_countdown -= 1 if oracle_finished_countdown == 0: print("Oracle forced stop!") break step_num += 1 # Call the rollout end callback, so that the model can save any debugging information, such as feature maps if callable(getattr(params.policy, "on_rollout_end", None)): params.policy.on_rollout_end(env_id, set_idx, seg_idx) if params.isSegmentLevel(): env_dataset.append(segment_dataset) # Plot the trajectories for error tracking # TODO: Plot entire envs not segment by segment if params.save_plots: if not params.isSegmentLevel(): self.presenter.plot_paths( env_dataset, segment_path=path[start_idx:end_idx + 1], interactive=False, bg=True) self.presenter.save_plot( params.getSavePlotPath(env_id, set_idx, seg_idx)) # Calculate end of segment error if end_idx > len(path) - 1: end_idx = len(path) - 1 # The reward is proportional to path length. Weigh it down, so that max reward is 1: seg_len = end_idx - start_idx #self.error_tracker.add_sample(not failed, drone_pos_force_stop, state.get_pos(), path[end_idx], # path[end_idx - 1], total_reward, seg_len) if params.first_segment_only: print("Only running the first segment") break sleep(sleepytime) return env_dataset